From c78ae2f55f7dbc2d88a11ae481a477b7eeeeddae Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 7 Jun 2024 10:46:30 -0400 Subject: [PATCH 01/15] Adds binary 1.1 roundtrip unit tests, `has_annotations` no longer invokes `annotations` --- src/lazy/encoder/binary/v1_1/value_writer.rs | 114 ++++++++++++++++++- src/lazy/expanded/mod.rs | 9 ++ src/lazy/value.rs | 14 ++- 3 files changed, 131 insertions(+), 6 deletions(-) diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 73deb58e..945efaf7 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -832,6 +832,7 @@ impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { #[cfg(test)] mod tests { + use crate::ion_data::IonEq; use crate::lazy::encoder::annotate::{Annotatable, Annotated}; use crate::lazy::encoder::annotation_seq::AnnotationSeq; use crate::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1; @@ -841,9 +842,11 @@ mod tests { use crate::raw_symbol_ref::AsRawSymbolRef; use crate::types::float::{FloatRepr, SmallestFloatRepr}; use crate::{ - Decimal, Element, Int, IonResult, IonType, Null, RawSymbolRef, SymbolId, Timestamp, + v1_1, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolRef, SymbolId, Timestamp, + Writer, }; use num_traits::FloatConst; + use rstest::rstest; fn encoding_test( test: impl FnOnce(&mut LazyRawBinaryWriter_1_1<&mut Vec>) -> IonResult<()>, @@ -2797,4 +2800,113 @@ mod tests { )?; Ok(()) } + + #[rstest] + #[case::boolean("true false")] + #[case::int("1 2 3 4 5")] + #[case::float("2.5e0 -2.5e0 100.2e0 -100.2e0")] + // `nan` breaks this + // #[case::float_special("+inf -inf nan")] + #[case::decimal("2.5 -2.5 100.2 -100.2")] + #[case::decimal_zero("0. 0d0 -0d0 -0.0")] + #[case::timestamp_unknown_offset( + r#" + 2024T + 2024-06T + 2024-06-07 + 2024-06-07T10:06-00:00 + 2024-06-07T10:06:30-00:00 + 2024-06-07T10:06:30.333-00:00 + "# + )] + #[case::timestamp_utc( + r#" + 2024-06-07T10:06Z + 2024-06-07T10:06+00:00 + 2024-06-07T10:06:30Z + 2024-06-07T10:06:30+00:00 + 2024-06-07T10:06:30.333Z + 2024-06-07T10:06:30.333+00:00 + "# + )] + // Offset gets mangled during round-tripping + // #[case::timestamp_known_offset( + // r#" + // 2024-06-07T10:06+02:00 + // 2024-06-07T10:06+01:00 + // 2024-06-07T10:06-05:00 + // 2024-06-07T10:06-08:00 + // 2024-06-07T10:06:30+02:00 + // 2024-06-07T10:06:30+01:00 + // 2024-06-07T10:06:30-05:00 + // 2024-06-07T10:06:30-08:00 + // 2024-06-07T10:06:30.333+02:00 + // 2024-06-07T10:06:30.333+01:00 + // 2024-06-07T10:06:30.333-05:00 + // 2024-06-07T10:06:30.333-08:00 + // "# + // )] + #[case::string( + r#" + "" + "hello" + "안녕하세요" + "⚛️" + "# + )] + // Requires annotations to create a symbol table + // #[case::symbol( + // r#" + // foo + // 'bar baz' + // "# + // )] + #[case::symbol_unknown_text("$0")] + #[case::blob("{{}} {{aGVsbG8=}}")] + #[case::clob(r#"{{""}} {{"hello"}}"#)] + #[case::list( + r#" + [] + [1, 2, 3] + [1, [2, 3], 4] + "# + )] + #[case::sexp( + r#" + () + (1 2 3) + (1 (2 3) 4) + "# + )] + // Requires annotations to create a symbol table + // #[case::struct_( + // r#" + // {} + // {a: 1, b: 2, c: 3} + // {a: 1, b: {c: 2, d: 3}, e: 4} + // "# + // )] + fn roundtripping(#[case] ion_data_1_0: &str) -> IonResult<()> { + let original_sequence = Element::read_all(ion_data_1_0)?; + let mut writer = Writer::new(v1_1::Binary, Vec::new())?; + writer.write_all(&original_sequence)?; + let binary_data_1_1 = writer.close()?; + let output_sequence = Element::read_all(binary_data_1_1)?; + assert_eq!( + original_sequence, + output_sequence, + "(original, after roundtrip)\n{}", + original_sequence.iter().zip(output_sequence.iter()).fold( + String::new(), + |mut text, (before, after)| { + use std::fmt::Write; + let is_eq = before.ion_eq(after); + let flag = if is_eq { "" } else { "<- not IonEq" }; + writeln!(&mut text, "({}, {}) {}", before, after, flag).unwrap(); + text + } + ) + ); + Ok(()) + } } diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 8dc5712b..b6fa9eb9 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -633,6 +633,15 @@ impl<'top, Encoding: Decoder> LazyExpandedValue<'top, Encoding> { } } + pub fn has_annotations(&self) -> bool { + use ExpandedValueSource::*; + match &self.source { + ValueLiteral(value) => value.has_annotations(), + Template(_, element) => !element.annotations().is_empty(), + Constructed(annotations, _) => !annotations.is_empty(), + } + } + pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, Encoding> { use ExpandedValueSource::*; match &self.source { diff --git a/src/lazy/value.rs b/src/lazy/value.rs index 38ba1d06..02947d3f 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -226,7 +226,7 @@ impl<'top, D: Decoder> LazyValue<'top, D> { } pub fn has_annotations(&self) -> bool { - self.expanded_value.annotations().next().is_some() + self.expanded_value.has_annotations() } /// Reads the body of this value (that is: its data) and returns it as a [`ValueRef`]. @@ -310,10 +310,14 @@ impl<'top, D: Decoder> LazyValue<'top, D> { impl<'top, D: Decoder> TryFrom> for Element { type Error = IonError; - fn try_from(value: LazyValue<'top, D>) -> Result { - let annotations: Annotations = value.annotations().try_into()?; - let value: Value = value.read()?.try_into()?; - Ok(value.with_annotations(annotations)) + fn try_from(lazy_value: LazyValue<'top, D>) -> Result { + let value: Value = lazy_value.read()?.try_into()?; + if lazy_value.has_annotations() { + let annotations: Annotations = lazy_value.annotations().try_into()?; + Ok(value.with_annotations(annotations)) + } else { + Ok(value.into()) + } } } From 344826cbfe7cd34a513b3afa0912cf18ed655da5 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 7 Jun 2024 15:52:28 -0400 Subject: [PATCH 02/15] Implements reading binary FlexSym annotations --- src/lazy/binary/encoded_value.rs | 8 +- src/lazy/binary/immutable_buffer.rs | 4 +- .../binary/raw/v1_1/annotations_iterator.rs | 42 +++++- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 122 +++++++++++++++--- src/lazy/binary/raw/v1_1/struct.rs | 2 +- src/lazy/binary/raw/v1_1/type_descriptor.rs | 8 +- src/lazy/binary/raw/v1_1/value.rs | 36 ++---- src/lazy/encoder/binary/v1_1/flex_sym.rs | 4 +- src/lazy/encoder/binary/v1_1/value_writer.rs | 87 ++++++++++--- 9 files changed, 245 insertions(+), 68 deletions(-) diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index ba3ce24d..fba9d50e 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,4 +1,5 @@ use crate::lazy::binary::raw::type_descriptor::Header; +use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; use crate::IonType; use std::ops::Range; @@ -77,7 +78,10 @@ pub(crate) struct EncodedValue { // sequence itself. pub annotations_header_length: u8, // The number of bytes used to encode the series of symbol IDs inside the annotations wrapper. - pub annotations_sequence_length: u8, + pub annotations_sequence_length: u16, + // Whether the annotations sequence is encoded as `FlexSym`s or as symbol addresses. + // In Ion 1.0, they are always encoded as symbol addresses. + pub annotations_encoding: AnnotationsEncoding, // The offset of the type descriptor byte within the overall input stream. pub header_offset: usize, // The number of bytes used to encode the optional length VarUInt following the header byte. @@ -237,6 +241,7 @@ mod tests { use crate::binary::IonTypeCode; use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::type_descriptor::Header; + use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; use crate::{IonResult, IonType}; #[test] @@ -250,6 +255,7 @@ mod tests { }, annotations_header_length: 3, annotations_sequence_length: 1, + annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset: 200, length_length: 0, value_body_length: 3, diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 6db71279..7b1c4d6f 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -10,6 +10,7 @@ use crate::binary::var_uint::VarUInt; use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::r#struct::LazyRawBinaryFieldName_1_0; use crate::lazy::binary::raw::type_descriptor::{Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS}; +use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0}; use crate::lazy::decoder::LazyRawFieldExpr; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; @@ -704,6 +705,7 @@ impl<'a> ImmutableBuffer<'a> { // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, + annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, length_length, value_body_length: value_length, @@ -745,7 +747,7 @@ impl<'a> ImmutableBuffer<'a> { } lazy_value.encoded_value.annotations_header_length = wrapper.header_length; - lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length; + lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length as u16; lazy_value.encoded_value.total_length += wrapper.header_length as usize; // Modify the input to include the annotations lazy_value.input = input; diff --git a/src/lazy/binary/raw/v1_1/annotations_iterator.rs b/src/lazy/binary/raw/v1_1/annotations_iterator.rs index f9d5275e..5e10fea5 100644 --- a/src/lazy/binary/raw/v1_1/annotations_iterator.rs +++ b/src/lazy/binary/raw/v1_1/annotations_iterator.rs @@ -1,16 +1,21 @@ #![allow(non_camel_case_types)] -use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; -use crate::{IonResult, RawSymbolRef}; +use crate::lazy::binary::raw::v1_1::immutable_buffer::{AnnotationsEncoding, ImmutableBuffer}; +use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSymValue; +use crate::{IonResult, RawSymbolRef, SymbolId}; /// Iterates over a slice of bytes, lazily reading them as a sequence of FlexUInt- or /// FlexSym-encoded symbol IDs. pub struct RawBinaryAnnotationsIterator_1_1<'a> { buffer: ImmutableBuffer<'a>, + encoding: AnnotationsEncoding, } impl<'a> RawBinaryAnnotationsIterator_1_1<'a> { - pub(crate) fn new(buffer: ImmutableBuffer<'a>) -> RawBinaryAnnotationsIterator_1_1<'a> { - Self { buffer } + pub(crate) fn new( + buffer: ImmutableBuffer<'a>, + encoding: AnnotationsEncoding, + ) -> RawBinaryAnnotationsIterator_1_1<'a> { + Self { buffer, encoding } } } @@ -18,6 +23,33 @@ impl<'a> Iterator for RawBinaryAnnotationsIterator_1_1<'a> { type Item = IonResult>; fn next(&mut self) -> Option { - todo!() + if self.buffer.is_empty() { + return None; + } + use AnnotationsEncoding::*; + let (raw_symbol, remaining_input) = match self.encoding { + SymbolAddress => match self.buffer.read_flex_uint() { + Ok((flex_uint, remaining_input)) => ( + RawSymbolRef::SymbolId(flex_uint.value() as SymbolId), + remaining_input, + ), + Err(error) => return Some(Err(error)), + }, + FlexSym => { + let (flex_sym, remaining_input) = match self.buffer.read_flex_sym() { + Ok((flex_sym, remaining_input)) => (flex_sym, remaining_input), + Err(error) => return Some(Err(error)), + }; + let raw_symbol = match flex_sym.value() { + FlexSymValue::SymbolRef(raw_symbol) => raw_symbol, + FlexSymValue::Opcode(_) => { + todo!("FlexSym escapes in annotation sequences") + } + }; + (raw_symbol, remaining_input) + } + }; + self.buffer = remaining_input; + Some(Ok(raw_symbol)) } } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 58abb45f..1cfdab9a 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -3,7 +3,7 @@ use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::v1_1::value::{ LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, }; -use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, ION_1_1_OPCODES}; +use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES}; use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; @@ -173,12 +173,6 @@ impl<'a> ImmutableBuffer<'a> { Ok((flex_sym, remaining)) } - /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning - /// its subfields in an [`AnnotationsWrapper`]. - pub fn read_annotations_wrapper(&self, _opcode: Opcode) -> ParseResult<'a, AnnotationsWrapper> { - todo!(); - } - /// Reads a `NOP` encoding primitive from the buffer. If it is successful, returns an `Ok(_)` /// containing the number of bytes that were consumed. /// @@ -278,7 +272,7 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that /// the next byte (`type_descriptor`) is not a NOP. pub fn read_value(self, opcode: Opcode) -> IonResult> { - if opcode.is_annotation_wrapper() { + if opcode.is_annotations_sequence() { self.read_annotated_value(opcode) } else { self.read_value_without_annotations(opcode) @@ -309,6 +303,7 @@ impl<'a> ImmutableBuffer<'a> { // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, + annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, length_length, value_body_length: value_length, @@ -340,19 +335,114 @@ impl<'a> ImmutableBuffer<'a> { /// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm /// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper. - fn read_annotated_value( + fn read_annotated_value(self, opcode: Opcode) -> IonResult> { + let (annotations_seq, input_after_annotations) = self.read_annotations_sequence(opcode)?; + let opcode = input_after_annotations.peek_opcode()?; + let mut value = input_after_annotations.read_value_without_annotations(opcode)?; + value.encoded_value.annotations_header_length = annotations_seq.header_length; + value.encoded_value.annotations_sequence_length = annotations_seq.sequence_length; + value.encoded_value.annotations_encoding = annotations_seq.encoding; + value.encoded_value.total_length += + annotations_seq.header_length as usize + annotations_seq.sequence_length as usize; + // Rewind the input to include the annotations sequence + value.input = self; + Ok(value) + } + + fn read_annotations_sequence(self, opcode: Opcode) -> ParseResult<'a, EncodedAnnotations> { + match opcode.opcode_type { + OpcodeType::AnnotationFlexSym => self.read_flex_sym_annotations_sequence(opcode), + OpcodeType::SymbolAddress => self.read_symbol_address_annotations_sequence(opcode), + _ => unreachable!("read_annotations_sequence called for non-annotations opcode"), + } + } + + fn read_flex_sym_annotations_sequence( self, - mut _type_descriptor: Opcode, - ) -> IonResult> { - todo!(); + opcode: Opcode, + ) -> ParseResult<'a, EncodedAnnotations> { + let input_after_opcode = self.consume(1); + // TODO: This implementation actively reads the annotations, which isn't necessary. + // At this phase of parsing we can just identify the buffer slice that contains + // the annotations and remember their encoding; later on, the annotations iterator + // can actually do the reading. That optimization would be impactful for FlexSyms + // that represent inline text. + let (sequence, remaining_input) = match opcode.length_code { + 7 => { + let (flex_sym, remaining_input) = input_after_opcode.read_flex_sym()?; + let sequence = EncodedAnnotations { + encoding: AnnotationsEncoding::FlexSym, + header_length: 1, // 0xE7 + sequence_length: u16::try_from(flex_sym.size_in_bytes()).map_err(|_| { + IonError::decoding_error( + "the maximum supported annotations sequence length is 65KB.", + ) + })?, + }; + (sequence, remaining_input) + } + 8 => { + let (flex_sym1, input_after_sym1) = input_after_opcode.read_flex_sym()?; + let (flex_sym2, input_after_sym2) = input_after_sym1.read_flex_sym()?; + let combined_length = flex_sym1.size_in_bytes() + flex_sym2.size_in_bytes(); + let sequence = EncodedAnnotations { + encoding: AnnotationsEncoding::FlexSym, + header_length: 1, // 0xE8 + sequence_length: u16::try_from(combined_length).map_err(|_| { + IonError::decoding_error( + "the maximum supported annotations sequence length is 65KB.", + ) + })?, + }; + (sequence, input_after_sym2) + } + 9 => { + let (flex_uint, remaining_input) = input_after_opcode.read_flex_uint()?; + let sequence = EncodedAnnotations { + encoding: AnnotationsEncoding::FlexSym, + header_length: u8::try_from(1 + flex_uint.size_in_bytes()).map_err(|_| { + IonError::decoding_error("found a 256+ byte annotations header") + })?, + sequence_length: u16::try_from(flex_uint.value()).map_err(|_| { + IonError::decoding_error( + "the maximum supported annotations sequence length is 65KB.", + ) + })?, + }; + ( + sequence, + remaining_input.consume(sequence.sequence_length as usize), + ) + } + _ => unreachable!("reading flexsym annotations sequence with invalid length code"), + }; + Ok((sequence, remaining_input)) } + + fn read_symbol_address_annotations_sequence( + self, + _opcode: Opcode, + ) -> ParseResult<'a, EncodedAnnotations> { + todo!() + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum AnnotationsEncoding { + SymbolAddress, + FlexSym, } -/// Represents the data found in an Ion 1.0 annotations wrapper. -pub struct AnnotationsWrapper { +/// Represents the data found in an Ion 1.1 annotations sequence +#[derive(Clone, Copy, Debug)] +pub struct EncodedAnnotations { + pub encoding: AnnotationsEncoding, + // The number of bytes used to represent the annotations opcode and the byte length prefix + // (in the case of 0xE9). As a result, this will almost always be 1 or 2. pub header_length: u8, - pub sequence_length: u8, - pub expected_value_length: usize, + // The number of bytes used to represent the annotations sequence itself. Because these + // can be encoded with inline text, it's possible for the length to be non-trivial. + pub sequence_length: u16, } #[cfg(test)] diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs index 8e7dec19..207d3eb7 100644 --- a/src/lazy/binary/raw/v1_1/struct.rs +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -140,7 +140,7 @@ impl<'top> RawBinaryStructIterator_1_1<'top> { bytes_to_skip: 0, struct_type: match opcode_type { // TODO: Delimited struct handling - OpcodeType::Struct => StructType::FlexSym, + OpcodeType::Struct => StructType::SymbolAddress, _ => unreachable!("Unexpected opcode for structure"), }, } diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 3e2fec3a..90f982ca 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -68,6 +68,7 @@ impl Opcode { (0xD, _) => (Struct, low_nibble, Some(IonType::Struct)), (0xE, 0x0) => (IonVersionMarker, low_nibble, None), (0xE, 0x1..=0x3) => (SymbolAddress, low_nibble, Some(IonType::Symbol)), + (0xE, 0x7..=0x9) => (AnnotationFlexSym, low_nibble, None), (0xE, 0xA) => (NullNull, low_nibble, Some(IonType::Null)), (0xE, 0xB) => (TypedNull, low_nibble, Some(IonType::Null)), (0xE, 0xC..=0xD) => (Nop, low_nibble, None), @@ -102,8 +103,11 @@ impl Opcode { self.opcode_type == OpcodeType::IonVersionMarker } - pub fn is_annotation_wrapper(&self) -> bool { - false + pub fn is_annotations_sequence(&self) -> bool { + match self.opcode_type { + OpcodeType::AnnotationSymAddress | OpcodeType::AnnotationFlexSym => true, + _ => false, + } } #[inline] diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index bde44125..e3874563 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -158,30 +158,16 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Returns an `ImmutableBuffer` that contains the bytes comprising this value's encoded /// annotations sequence. fn annotations_sequence(&self) -> ImmutableBuffer<'top> { - let offset_and_length = self - .encoded_value - .annotations_sequence_offset() - .map(|offset| { - ( - offset, - self.encoded_value.annotations_sequence_length().unwrap(), - ) - }); - let (sequence_offset, sequence_length) = match offset_and_length { - None => { - // If there are no annotations, return an empty slice starting at the opcode. - return self.input.slice(0, 0); - } - Some(offset_and_length) => offset_and_length, - }; - let local_sequence_offset = sequence_offset - self.input.offset(); - - self.input.slice(local_sequence_offset, sequence_length) + let sequence = self.input.slice( + self.encoded_value.annotations_header_length as usize, + self.encoded_value.annotations_sequence_length as usize + ); + sequence } /// Returns an iterator over this value's unresolved annotation symbols. pub fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { - RawBinaryAnnotationsIterator_1_1::new(self.annotations_sequence()) + RawBinaryAnnotationsIterator_1_1::new(self.annotations_sequence(), self.encoded_value.annotations_encoding) } /// Reads this value's data, returning it as a [`RawValueRef`]. If this value is a container, @@ -217,7 +203,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { } /// Returns the encoded byte slice representing this value's data. - fn value_body(&self) -> IonResult<&'top [u8]> { + pub(crate) fn value_body(&self) -> IonResult<&'top [u8]> { let value_total_length = self.encoded_value.total_length(); if self.input.len() < value_total_length { return IonResult::incomplete( @@ -266,7 +252,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { (OpcodeType::Integer, 0x0) => 0.into(), (OpcodeType::Integer, n) => { // We have n bytes following that make up our integer. - self.input.consume(1).read_fixed_int(n)?.0.into() + self.available_body().read_fixed_int(n)?.0.into() } (OpcodeType::LargeInteger, 0x6) => { // We have a FlexUInt size, then big int. @@ -285,14 +271,14 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { let value = match self.encoded_value.value_body_length { 8 => { let mut buffer = [0; 8]; - let val_bytes = self.input.bytes_range(1, 8); + let val_bytes = self.available_body().bytes_range(0, 8); buffer[..8].copy_from_slice(val_bytes); f64::from_le_bytes(buffer) } 4 => { let mut buffer = [0; 4]; - let val_bytes = self.input.bytes_range(1, 4); + let val_bytes = self.available_body().bytes_range(0, 4); buffer[..4].copy_from_slice(val_bytes); f32::from_le_bytes(buffer).into() @@ -650,7 +636,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { let biases: [usize; 3] = [0, 256, 65792]; let length_code = self.encoded_value.header.length_code; if (1..=3).contains(&length_code) { - let (id, _) = self.input.consume(1).read_fixed_uint(length_code.into())?; + let (id, _) = self.available_body().read_fixed_uint(length_code.into())?; let id = usize::try_from(id.value())?; Ok(id + biases[(length_code - 1) as usize]) } else { diff --git a/src/lazy/encoder/binary/v1_1/flex_sym.rs b/src/lazy/encoder/binary/v1_1/flex_sym.rs index ef89b9fa..f610562d 100644 --- a/src/lazy/encoder/binary/v1_1/flex_sym.rs +++ b/src/lazy/encoder/binary/v1_1/flex_sym.rs @@ -67,7 +67,7 @@ impl<'top> FlexSym<'top> { let sym_value = value.value(); let (flex_sym_value, size_in_bytes) = match sym_value.cmp(&0) { Ordering::Greater => ( - FlexSymValue::SymbolRef(RawSymbolRef::SymbolId(sym_value as usize)), + FlexSymValue::SymbolRef(SymbolId(sym_value as usize)), value.size_in_bytes(), ), Ordering::Less => { @@ -76,7 +76,7 @@ impl<'top> FlexSym<'top> { let text = std::str::from_utf8(&input[flex_int_len..flex_int_len + len]).map_err( |_| IonError::decoding_error("found FlexSym with invalid UTF-8 data"), )?; - let symbol_ref = RawSymbolRef::Text(text); + let symbol_ref = Text(text); (FlexSymValue::SymbolRef(symbol_ref), flex_int_len + len) } Ordering::Equal => ( diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 945efaf7..d8dbb53c 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -2804,11 +2804,14 @@ mod tests { #[rstest] #[case::boolean("true false")] #[case::int("1 2 3 4 5")] + #[case::annotated_int("foo::1 bar::baz::2 quux::quuz::waldo::3")] #[case::float("2.5e0 -2.5e0 100.2e0 -100.2e0")] + #[case::annotated_float("foo::2.5e0 bar::baz::-2.5e0 quux::quuz::waldo::100.2e0")] // `nan` breaks this // #[case::float_special("+inf -inf nan")] #[case::decimal("2.5 -2.5 100.2 -100.2")] #[case::decimal_zero("0. 0d0 -0d0 -0.0")] + #[case::annotated_decimal("foo::2.5 bar::baz::-2.5 quux::quuz::waldo::100.2")] #[case::timestamp_unknown_offset( r#" 2024T @@ -2846,6 +2849,13 @@ mod tests { // 2024-06-07T10:06:30.333-08:00 // "# // )] + #[case::annotated_timestamp( + r#" + foo::2024T + bar::baz::2024-06T + quux::quuz::waldo::2024-06-07T + "# + )] #[case::string( r#" "" @@ -2854,16 +2864,43 @@ mod tests { "⚛️" "# )] - // Requires annotations to create a symbol table - // #[case::symbol( - // r#" - // foo - // 'bar baz' - // "# - // )] + #[case::annotated_string( + r#" + foo::"" + bar::baz::"안녕하세요" + quux::quuz::waldo::"⚛️" + "# + )] + #[case::symbol( + r#" + foo + 'bar baz' + "# + )] + #[case::annotated_symbol( + r#" + foo::Earth + bar::baz::Mars + quux::quuz::waldo::Jupiter + "# + )] #[case::symbol_unknown_text("$0")] #[case::blob("{{}} {{aGVsbG8=}}")] + #[case::annotated_blob( + r#" + foo::{{}} + bar::baz::{{aGVsbG8=}} + quux::quuz::waldo::{{aGVsbG8=}} + "# + )] #[case::clob(r#"{{""}} {{"hello"}}"#)] + #[case::annotated_clob( + r#" + foo::{{""}} + bar::baz::{{"hello"}} + quux::quuz::waldo::{{"world"}} + "# + )] #[case::list( r#" [] @@ -2871,6 +2908,13 @@ mod tests { [1, [2, 3], 4] "# )] + #[case::annotated_list( + r#" + foo::[] + bar::baz::[1, 2, 3] + quux::quuz::waldo::[1, nested::[2, 3], 4] + "# + )] #[case::sexp( r#" () @@ -2878,14 +2922,27 @@ mod tests { (1 (2 3) 4) "# )] - // Requires annotations to create a symbol table - // #[case::struct_( - // r#" - // {} - // {a: 1, b: 2, c: 3} - // {a: 1, b: {c: 2, d: 3}, e: 4} - // "# - // )] + #[case::annotated_sexp( + r#" + foo::() + bar::baz::(1 2 3) + quux::quuz::waldo::(1 nested::(2 3) 4) + "# + )] + #[case::struct_( + r#" + {} + {a: 1, b: 2, c: 3} + {a: 1, b: {c: 2, d: 3}, e: 4} + "# + )] + #[case::annotated_struct( + r#" + foo::{} + bar::baz::{a: 1, b: 2, c: 3} + quux::quuz::waldo::{a: 1, b: nested::{c: 2, d: 3}, e: 4} + "# + )] fn roundtripping(#[case] ion_data_1_0: &str) -> IonResult<()> { let original_sequence = Element::read_all(ion_data_1_0)?; let mut writer = Writer::new(v1_1::Binary, Vec::new())?; From 8b08c4df815b3d31a9058b330a701db0f1710f9b Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 7 Jun 2024 16:26:34 -0400 Subject: [PATCH 03/15] Fixes timestamp decoding of offsets --- src/lazy/binary/raw/v1_1/reader.rs | 22 ++++----- src/lazy/binary/raw/v1_1/value.rs | 11 ++--- src/lazy/encoder/binary/v1_1/value_writer.rs | 48 ++++++++++---------- 3 files changed, 40 insertions(+), 41 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index b157ffac..200a15d6 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -536,17 +536,17 @@ mod tests { #[case("2024T", &[0x80, 0x36])] #[case("2023-10T", &[0x81, 0x35, 0x05])] #[case("2023-10-15T", &[0x82, 0x35, 0x7D])] - #[case("2023-10-15T05:04Z", &[0x83, 0x35, 0x7D, 0x85, 0x08])] - #[case("2023-10-15T05:04:03Z", &[0x84, 0x35, 0x7D, 0x85, 0x38, 0x00])] - #[case("2023-10-15T05:04:03.123-00:00", &[0x85, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01])] - #[case("2023-10-15T05:04:03.000123-00:00", &[0x86, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01, 0x00])] - #[case("2023-10-15T05:04:03.000000123-00:00", &[0x87, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01, 0x00, 0x00])] - #[case("2023-10-15T05:04+01:00", &[0x88, 0x35, 0x7D, 0x85, 0x20, 0x00])] - #[case("2023-10-15T05:04-01:00", &[0x88, 0x35, 0x7D, 0x85, 0xE0, 0x03])] - #[case("2023-10-15T05:04:03+01:00", &[0x89, 0x35, 0x7D, 0x85, 0x20, 0x0C])] - #[case("2023-10-15T05:04:03.123+01:00", &[0x8A, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00])] - #[case("2023-10-15T05:04:03.000123+01:00", &[0x8B, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00, 0x00])] - #[case("2023-10-15T05:04:03.000000123+01:00", &[0x8C, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00, 0x00, 0x00])] + #[case("2023-10-15T05:04Z", &[0x83, 0x35, 0x7D, 0x85, 0x00])] + #[case("2023-10-15T05:04:03Z", &[0x84, 0x35, 0x7D, 0x85, 0x30, 0x00])] + #[case("2023-10-15T05:04:03.123-00:00", &[0x85, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01])] + #[case("2023-10-15T05:04:03.000123-00:00", &[0x86, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01, 0x00])] + #[case("2023-10-15T05:04:03.000000123-00:00", &[0x87, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01, 0x00, 0x00])] + #[case("2023-10-15T05:04+01:00", &[0x88, 0x35, 0x7D, 0x85, 0xE0, 0x01])] + #[case("2023-10-15T05:04-01:00", &[0x88, 0x35, 0x7D, 0x85, 0xA0, 0x01])] + #[case("2023-10-15T05:04:03+01:00", &[0x89, 0x35, 0x7D, 0x85, 0xE0, 0x0D])] + #[case("2023-10-15T05:04:03.123+01:00", &[0x8A, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00])] + #[case("2023-10-15T05:04:03.000123+01:00", &[0x8B, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00, 0x00])] + #[case("2023-10-15T05:04:03.000000123+01:00", &[0x8C, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00, 0x00, 0x00])] fn timestamps_short(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index e3874563..4380af15 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -320,7 +320,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { Ok(RawValueRef::Decimal(decimal)) } - // Helper method callsed by [`Self::read_timestamp_short`]. Reads the time information from a + // Helper method called by [`Self::read_timestamp_short`]. Reads the time information from a // timestamp with Unknown or UTC offset. fn read_timestamp_short_no_offset_after_minute( &self, @@ -332,7 +332,8 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { const MICROSECONDS_MASK_32BIT: u32 = 0x3F_FF_FC_00; let length_code = self.encoded_value.header.length_code(); - let is_utc = (value_bytes[3] & 0x08) == 0x08; + // An offset bit of `0` indicates UTC while a `1` indicates 'unknown' + let is_utc = (value_bytes[3] & 0x08) == 0; // Hour & Minute (populated from [`Self::read_timestamp_short`]), just need to know if UTC. if length_code == 3 { @@ -422,10 +423,8 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { // Read offset as 15min multiple let offset: u16 = u16::from_le_bytes(value_bytes[3..=4].try_into().unwrap()) .extract_bitmask(OFFSET_MASK_16BIT); - // The 7th bit is our sign bit, below we extend it through the rest of the i32, and - // multiply by 15 to get the number of minutes. - // https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend - let offset: i32 = 15 * (offset as i32 ^ 0x040).wrapping_sub(0x040); + const MIN_OFFSET: i32 = -14 * 60; // Western hemisphere, -14:00 + let offset: i32 = ((offset as i32) * 15) + MIN_OFFSET; // Hour and Minutes at known offset if length_code == 8 { diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index d8dbb53c..0ef88f0a 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -255,8 +255,8 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { pub fn write_timestamp(mut self, value: &Timestamp) -> IonResult<()> { use crate::TimestampPrecision::*; - const MIN_OFFSET: i32 = -14 * 60; // Western hemisphere, 14:00 - const MAX_OFFSET: i32 = 14 * 60; // Eastern hemisphere, -14:00 + const MIN_OFFSET: i32 = -14 * 60; // Western hemisphere, -14:00 + const MAX_OFFSET: i32 = 14 * 60; // Eastern hemisphere, 14:00 const SHORT_FORM_OFFSET_RANGE: std::ops::RangeInclusive = MIN_OFFSET..=MAX_OFFSET; let precision = value.precision(); @@ -2807,8 +2807,7 @@ mod tests { #[case::annotated_int("foo::1 bar::baz::2 quux::quuz::waldo::3")] #[case::float("2.5e0 -2.5e0 100.2e0 -100.2e0")] #[case::annotated_float("foo::2.5e0 bar::baz::-2.5e0 quux::quuz::waldo::100.2e0")] - // `nan` breaks this - // #[case::float_special("+inf -inf nan")] + #[case::float_special("+inf -inf nan")] #[case::decimal("2.5 -2.5 100.2 -100.2")] #[case::decimal_zero("0. 0d0 -0d0 -0.0")] #[case::annotated_decimal("foo::2.5 bar::baz::-2.5 quux::quuz::waldo::100.2")] @@ -2832,23 +2831,22 @@ mod tests { 2024-06-07T10:06:30.333+00:00 "# )] - // Offset gets mangled during round-tripping - // #[case::timestamp_known_offset( - // r#" - // 2024-06-07T10:06+02:00 - // 2024-06-07T10:06+01:00 - // 2024-06-07T10:06-05:00 - // 2024-06-07T10:06-08:00 - // 2024-06-07T10:06:30+02:00 - // 2024-06-07T10:06:30+01:00 - // 2024-06-07T10:06:30-05:00 - // 2024-06-07T10:06:30-08:00 - // 2024-06-07T10:06:30.333+02:00 - // 2024-06-07T10:06:30.333+01:00 - // 2024-06-07T10:06:30.333-05:00 - // 2024-06-07T10:06:30.333-08:00 - // "# - // )] + #[case::timestamp_known_offset( + r#" + 2024-06-07T10:06+02:00 + 2024-06-07T10:06+01:00 + 2024-06-07T10:06-05:00 + 2024-06-07T10:06-08:00 + 2024-06-07T10:06:30+02:00 + 2024-06-07T10:06:30+01:00 + 2024-06-07T10:06:30-05:00 + 2024-06-07T10:06:30-08:00 + 2024-06-07T10:06:30.333+02:00 + 2024-06-07T10:06:30.333+01:00 + 2024-06-07T10:06:30.333-05:00 + 2024-06-07T10:06:30.333-08:00 + "# + )] #[case::annotated_timestamp( r#" foo::2024T @@ -2944,14 +2942,16 @@ mod tests { "# )] fn roundtripping(#[case] ion_data_1_0: &str) -> IonResult<()> { + // This test uses application-level readers and writers to do its roundtripping. This means + // that tests involving annotations, symbol values, or struct field names will produce a + // symbol table. let original_sequence = Element::read_all(ion_data_1_0)?; let mut writer = Writer::new(v1_1::Binary, Vec::new())?; writer.write_all(&original_sequence)?; let binary_data_1_1 = writer.close()?; let output_sequence = Element::read_all(binary_data_1_1)?; - assert_eq!( - original_sequence, - output_sequence, + assert!( + original_sequence.ion_eq(&output_sequence), "(original, after roundtrip)\n{}", original_sequence.iter().zip(output_sequence.iter()).fold( String::new(), From b8b9e1dabac089cb263b03f12946d504d70614e8 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 7 Jun 2024 16:57:47 -0400 Subject: [PATCH 04/15] cargo fmt --- src/lazy/binary/raw/v1_1/value.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 4380af15..82b75fdd 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -160,14 +160,17 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { fn annotations_sequence(&self) -> ImmutableBuffer<'top> { let sequence = self.input.slice( self.encoded_value.annotations_header_length as usize, - self.encoded_value.annotations_sequence_length as usize + self.encoded_value.annotations_sequence_length as usize, ); sequence } /// Returns an iterator over this value's unresolved annotation symbols. pub fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { - RawBinaryAnnotationsIterator_1_1::new(self.annotations_sequence(), self.encoded_value.annotations_encoding) + RawBinaryAnnotationsIterator_1_1::new( + self.annotations_sequence(), + self.encoded_value.annotations_encoding, + ) } /// Reads this value's data, returning it as a [`RawValueRef`]. If this value is a container, From bfa0f03a62f3feaa4391a8de7694f7c16afa51e0 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 7 Jun 2024 17:18:15 -0400 Subject: [PATCH 05/15] 1.1 binary writer now uses 1 for UTC and 0 for unknown --- src/lazy/binary/raw/v1_1/value.rs | 2 +- src/lazy/encoder/binary/v1_1/value_writer.rs | 64 ++++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 82b75fdd..d6d9337c 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -336,7 +336,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { let length_code = self.encoded_value.header.length_code(); // An offset bit of `0` indicates UTC while a `1` indicates 'unknown' - let is_utc = (value_bytes[3] & 0x08) == 0; + let is_utc = (value_bytes[3] & 0x08) == 0x08; // Hour & Minute (populated from [`Self::read_timestamp_short`]), just need to know if UTC. if length_code == 3 { diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 0ef88f0a..38a34d5a 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -313,8 +313,8 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { // Compute the offset, its width in bits, and how that will affect the opcode and encoded length. let (num_offset_bits, offset_value, opcode_adjustment, length_adjustment) = match value.offset() { - None => (1, 1, 0, 0), // Unknown offset uses a single bit (1); opcode and length stay the same. - Some(0) => (1, 0, 0, 0), // UTC uses a single bit (0); opcode and length stay the same. + None => (1, 0, 0, 0), // Unknown offset uses a single bit (0); opcode and length stay the same. + Some(0) => (1, 1, 0, 0), // UTC uses a single bit (1); opcode and length stay the same. Some(offset_minutes) => { // Bump the opcode to the one the corresponds to the same precision/scale but with a known offset let opcode_adjustment = 5; @@ -1159,17 +1159,17 @@ mod tests { ( "2024-06-01T08:00Z", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_0000], + &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_1000], ), ( "2024-06-15T12:30Z", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_0011], + &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_1011], ), ( "2024-06-30T16:45Z", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_0101], + &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_1101], ), // // === Hour & Minute @ Unknown Offset === @@ -1177,17 +1177,17 @@ mod tests { ( "2024-06-01T08:00-00:00", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_1000], + &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_0000], ), ( "2024-06-15T12:30-00:00", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_1011], + &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_0011], ), ( "2024-06-30T16:45-00:00", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_1101], + &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_0101], ), // // === Second @ UTC === @@ -1199,7 +1199,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ...._..ss ], ), @@ -1210,7 +1210,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0000_0001, // ...._..ss ], ), @@ -1221,7 +1221,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b0000_0010, // ...._..ss ], ), @@ -1235,7 +1235,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ...._..ss ], ), @@ -1246,7 +1246,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0000_0001, // ...._..ss ], ), @@ -1257,7 +1257,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b0000_0010, // ...._..ss ], ), @@ -1271,7 +1271,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1283,7 +1283,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1295,7 +1295,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1310,7 +1310,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1322,7 +1322,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1334,7 +1334,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1349,7 +1349,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1362,7 +1362,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1375,7 +1375,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1391,7 +1391,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1404,7 +1404,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1417,7 +1417,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1433,7 +1433,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1447,7 +1447,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1461,7 +1461,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1478,7 +1478,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1492,7 +1492,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1506,7 +1506,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff From b71b6e39eef82dd8d1163eacec96d5a56defd207 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 7 Jun 2024 17:22:29 -0400 Subject: [PATCH 06/15] fixed typo --- src/lazy/binary/raw/v1_1/value.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index d6d9337c..c5025d0f 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -335,7 +335,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { const MICROSECONDS_MASK_32BIT: u32 = 0x3F_FF_FC_00; let length_code = self.encoded_value.header.length_code(); - // An offset bit of `0` indicates UTC while a `1` indicates 'unknown' + // An offset bit of `1` indicates UTC while a `0` indicates 'unknown' let is_utc = (value_bytes[3] & 0x08) == 0x08; // Hour & Minute (populated from [`Self::read_timestamp_short`]), just need to know if UTC. From 9470b5b5dd18bb70f2eff40415f36fd54d0f6a43 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 7 Jun 2024 17:31:37 -0400 Subject: [PATCH 07/15] clippy suggestion --- src/lazy/binary/raw/v1_1/type_descriptor.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 90f982ca..ea96a834 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -104,10 +104,8 @@ impl Opcode { } pub fn is_annotations_sequence(&self) -> bool { - match self.opcode_type { - OpcodeType::AnnotationSymAddress | OpcodeType::AnnotationFlexSym => true, - _ => false, - } + use OpcodeType::*; + matches!(self.opcode_type, AnnotationSymAddress | AnnotationFlexSym) } #[inline] From 2cef8140c56f2e0042bdba0dbd51b9caf8b296b7 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 8 Jun 2024 10:41:26 -0400 Subject: [PATCH 08/15] rename `length_code` to `low_nibble` --- src/lazy/binary/encoded_value.rs | 4 ++-- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 6 ++--- src/lazy/binary/raw/v1_1/type_descriptor.rs | 24 ++++++++++---------- src/lazy/binary/raw/v1_1/value.rs | 12 +++++----- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index fba9d50e..531cfef2 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -7,7 +7,7 @@ pub(crate) trait EncodedHeader: Copy { type TypeCode; fn ion_type(&self) -> IonType; fn type_code(&self) -> Self::TypeCode; - fn length_code(&self) -> u8; + fn low_nibble(&self) -> u8; fn is_null(&self) -> bool; } @@ -23,7 +23,7 @@ impl EncodedHeader for Header { self.ion_type_code } - fn length_code(&self) -> u8 { + fn low_nibble(&self) -> u8 { self.length_code } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 1cfdab9a..94acddbc 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -185,10 +185,10 @@ impl<'a> ImmutableBuffer<'a> { let opcode = self.peek_opcode()?; // We need to determine the size of the nop.. - let (size, remaining) = if opcode.length_code == 0xC { + let (size, remaining) = if opcode.low_nibble() == 0xC { // Size 0; the nop is contained entirely within the OpCode. (0, self.consume(1)) - } else if opcode.length_code == 0xD { + } else if opcode.low_nibble() == 0xD { // We have a flexuint telling us how long our nop is. let after_header = self.consume(1); let (len, rest) = after_header.read_flex_uint()?; @@ -367,7 +367,7 @@ impl<'a> ImmutableBuffer<'a> { // the annotations and remember their encoding; later on, the annotations iterator // can actually do the reading. That optimization would be impactful for FlexSyms // that represent inline text. - let (sequence, remaining_input) = match opcode.length_code { + let (sequence, remaining_input) = match opcode.low_nibble() { 7 => { let (flex_sym, remaining_input) = input_after_opcode.read_flex_sym()?; let sequence = EncodedAnnotations { diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index ea96a834..5de9a84e 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -8,7 +8,7 @@ use crate::IonType; pub struct Opcode { pub opcode_type: OpcodeType, pub ion_type: Option, - pub length_code: u8, + pub low_nibble: u8, } /// A statically defined array of TypeDescriptor that allows a binary reader to map a given @@ -34,7 +34,7 @@ static ION_1_1_TIMESTAMP_SHORT_SIZE: [u8; 13] = [1, 2, 2, 4, 5, 6, 7, 8, 5, 5, 7 const DEFAULT_HEADER: Opcode = Opcode { opcode_type: OpcodeType::Nop, ion_type: None, - length_code: 0, + low_nibble: 0, }; pub(crate) const fn init_opcode_cache() -> [Opcode; 256] { @@ -87,7 +87,7 @@ impl Opcode { Opcode { ion_type, opcode_type, - length_code, + low_nibble: length_code, } } @@ -114,7 +114,7 @@ impl Opcode { let header = Header { ion_type, ion_type_code: self.opcode_type, - length_code: self.length_code, + low_nibble: self.low_nibble, }; Some(header) } @@ -136,27 +136,27 @@ pub struct Header { // The only time the `ion_type_code` is required is to distinguish between positive // and negative integers. pub ion_type_code: OpcodeType, - pub length_code: u8, + pub low_nibble: u8, } impl Header { pub fn length_type(&self) -> LengthType { use LengthType::*; - match (self.ion_type_code, self.length_code) { + match (self.ion_type_code, self.low_nibble) { (OpcodeType::Boolean, 0xE..=0xF) => InOpcode(0), (OpcodeType::Float, 0xA) => InOpcode(0), - (OpcodeType::Float, 0xB..=0xD) => InOpcode(1 << (self.length_code - 0xA)), + (OpcodeType::Float, 0xB..=0xD) => InOpcode(1 << (self.low_nibble - 0xA)), (OpcodeType::Integer, n) => InOpcode(n), (OpcodeType::Nop, 0xC) => InOpcode(0), (OpcodeType::NullNull, 0xA) => InOpcode(0), - (OpcodeType::String, 0..=15) => InOpcode(self.length_code), + (OpcodeType::String, 0..=15) => InOpcode(self.low_nibble), (OpcodeType::InlineSymbol, n) if n < 16 => InOpcode(n), (OpcodeType::SymbolAddress, n) if n < 4 => InOpcode(n), - (OpcodeType::Decimal, 0..=15) => InOpcode(self.length_code), + (OpcodeType::Decimal, 0..=15) => InOpcode(self.low_nibble), (OpcodeType::List, n) if n < 16 => InOpcode(n), (OpcodeType::SExpression, n) if n < 16 => InOpcode(n), (OpcodeType::TimestampShort, 0..=12) => { - InOpcode(ION_1_1_TIMESTAMP_SHORT_SIZE[self.length_code as usize]) + InOpcode(ION_1_1_TIMESTAMP_SHORT_SIZE[self.low_nibble as usize]) } (OpcodeType::TypedNull, _) => InOpcode(1), (OpcodeType::Struct, n) if n < 16 => InOpcode(n), @@ -176,8 +176,8 @@ impl EncodedHeader for Header { self.ion_type_code } - fn length_code(&self) -> u8 { - self.length_code + fn low_nibble(&self) -> u8 { + self.low_nibble } fn is_null(&self) -> bool { diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index c5025d0f..93f9121c 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -237,7 +237,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { debug_assert!(self.encoded_value.ion_type() == IonType::Bool); let header = &self.encoded_value.header(); let representation = header.type_code(); - let value = match (representation, header.length_code) { + let value = match (representation, header.low_nibble) { (OpcodeType::Boolean, 0xE) => true, (OpcodeType::Boolean, 0xF) => false, _ => unreachable!("found a boolean value with an illegal length code."), @@ -251,7 +251,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { let header = &self.encoded_value.header(); let representation = header.type_code(); - let value = match (representation, header.length_code as usize) { + let value = match (representation, header.low_nibble as usize) { (OpcodeType::Integer, 0x0) => 0.into(), (OpcodeType::Integer, n) => { // We have n bytes following that make up our integer. @@ -334,7 +334,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { const MILLISECONDS_MASK_16BIT: u16 = 0x0F_FC; const MICROSECONDS_MASK_32BIT: u32 = 0x3F_FF_FC_00; - let length_code = self.encoded_value.header.length_code(); + let length_code = self.encoded_value.header.low_nibble(); // An offset bit of `1` indicates UTC while a `0` indicates 'unknown' let is_utc = (value_bytes[3] & 0x08) == 0x08; @@ -421,7 +421,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { const MICROSECOND_MASK_32BIT: u32 = 0x0F_FF_00; const NANOSECOND_MASK_32BIT: u32 = 0x3F_FF_FF_FF; - let length_code = self.encoded_value.header.length_code(); + let length_code = self.encoded_value.header.low_nibble(); // Read offset as 15min multiple let offset: u16 = u16::from_le_bytes(value_bytes[3..=4].try_into().unwrap()) @@ -481,7 +481,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { fn read_timestamp_short(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { const MONTH_MASK_16BIT: u16 = 0x07_80; - let length_code = self.encoded_value.header.length_code(); + let length_code = self.encoded_value.header.low_nibble(); let value_bytes = self.value_body()?; // Year is biased offset by 1970, and is held in the lower 7 bits of the first byte. @@ -636,7 +636,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Helper method called by [`Self::read_symbol`]. Reads the current value as a symbol ID. fn read_symbol_id(&self) -> IonResult { let biases: [usize; 3] = [0, 256, 65792]; - let length_code = self.encoded_value.header.length_code; + let length_code = self.encoded_value.header.low_nibble; if (1..=3).contains(&length_code) { let (id, _) = self.available_body().read_fixed_uint(length_code.into())?; let id = usize::try_from(id.value())?; From d564826851a49a797dcad8141b87a0d6f5ef74c2 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 8 Jun 2024 10:45:58 -0400 Subject: [PATCH 09/15] adds `low_nibble` accessor --- src/lazy/binary/raw/v1_1/type_descriptor.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 5de9a84e..2a6c1f8d 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -108,6 +108,10 @@ impl Opcode { matches!(self.opcode_type, AnnotationSymAddress | AnnotationFlexSym) } + pub fn low_nibble(&self) -> u8 { + self.low_nibble + } + #[inline] pub fn to_header(self) -> Option
{ let ion_type = self.ion_type?; From 7b4f4a7455729a97e9fc2145d64963d568e0d963 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sun, 9 Jun 2024 12:50:02 -0400 Subject: [PATCH 10/15] Stubs out binary e-expr, adds BumpAllocator ref to binary buffer --- src/lazy/any_encoding.rs | 6 +- src/lazy/binary/encoded_value.rs | 2 +- src/lazy/binary/immutable_buffer.rs | 2 +- src/lazy/binary/raw/v1_1/e_expression.rs | 86 +++++ src/lazy/binary/raw/v1_1/immutable_buffer.rs | 67 +++- src/lazy/binary/raw/v1_1/mod.rs | 2 + src/lazy/binary/raw/v1_1/reader.rs | 374 ++++++++++++------- src/lazy/binary/raw/v1_1/sequence.rs | 21 +- src/lazy/binary/raw/v1_1/type_descriptor.rs | 11 + src/lazy/binary/raw/v1_1/value.rs | 9 +- src/lazy/encoding.rs | 5 +- src/lazy/text/raw/v1_1/reader.rs | 156 ++++---- 12 files changed, 486 insertions(+), 255 deletions(-) create mode 100644 src/lazy/binary/raw/v1_1/e_expression.rs diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 616ab49b..84ff9ad1 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -11,6 +11,7 @@ use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{ LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator_1_0, }; +use crate::lazy::binary::raw::v1_1::e_expression::RawBinaryEExpression_1_1; use crate::lazy::binary::raw::v1_1::r#struct::{ LazyRawBinaryFieldName_1_1, LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1, }; @@ -33,7 +34,6 @@ use crate::lazy::encoding::{ BinaryEncoding_1_0, BinaryEncoding_1_1, TextEncoding_1_0, TextEncoding_1_1, }; use crate::lazy::expanded::macro_evaluator::RawEExpression; -use crate::lazy::never::Never; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::span::Span; @@ -174,7 +174,7 @@ pub struct LazyRawAnyEExpression<'top> { #[derive(Debug, Copy, Clone)] pub enum LazyRawAnyEExpressionKind<'top> { Text_1_1(RawTextEExpression_1_1<'top>), - Binary_1_1(Never), // TODO: RawBinaryEExpression_1_1 + Binary_1_1(RawBinaryEExpression_1_1<'top>), } impl<'top> LazyRawAnyEExpression<'top> { @@ -418,7 +418,7 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { Text_1_0(r) => Ok(r.next(allocator)?.into()), Binary_1_0(r) => Ok(r.next()?.into()), Text_1_1(r) => Ok(r.next(allocator)?.into()), - Binary_1_1(r) => Ok(r.next()?.into()), + Binary_1_1(r) => Ok(r.next(allocator)?.into()), } } diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index 531cfef2..f4608ffd 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -76,7 +76,7 @@ pub(crate) struct EncodedValue { // value. If `annotations` is empty, `annotations_header_length` will be zero. The annotations // wrapper contains several fields: an opcode, a wrapper length, a sequence length, and the // sequence itself. - pub annotations_header_length: u8, + pub annotations_header_length: u16, // The number of bytes used to encode the series of symbol IDs inside the annotations wrapper. pub annotations_sequence_length: u16, // Whether the annotations sequence is encoded as `FlexSym`s or as symbol addresses. diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 7b1c4d6f..ad3e56c0 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -746,7 +746,7 @@ impl<'a> ImmutableBuffer<'a> { ); } - lazy_value.encoded_value.annotations_header_length = wrapper.header_length; + lazy_value.encoded_value.annotations_header_length = wrapper.header_length as u16; lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length as u16; lazy_value.encoded_value.total_length += wrapper.header_length as usize; // Modify the input to include the annotations diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs new file mode 100644 index 00000000..958a3e3b --- /dev/null +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -0,0 +1,86 @@ +#![allow(non_camel_case_types)] + +use std::fmt::{Debug, Formatter}; +use std::ops::Range; + +use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; +use crate::lazy::decoder::LazyRawValueExpr; +use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::text::raw::v1_1::reader::{EncodedTextMacroInvocation, MacroIdRef}; +use crate::{v1_1, HasRange, HasSpan, IonResult, Span}; + +pub struct EncodedBinaryEExp { + // The number of bytes that were used to encode the e-expression's header (including its ID) + header_length: u16, +} + +impl EncodedBinaryEExp { + pub fn new(header_length: u16) -> Self { + Self { header_length } + } +} + +#[derive(Copy, Clone)] +pub struct RawBinaryEExpression_1_1<'top> { + pub(crate) encoded_expr: EncodedTextMacroInvocation, + pub(crate) input: ImmutableBuffer<'top>, + pub(crate) id: MacroIdRef<'top>, + pub(crate) arg_expr_cache: &'top [LazyRawValueExpr<'top, v1_1::Binary>], +} + +impl<'top> HasSpan<'top> for RawBinaryEExpression_1_1<'top> { + fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) + } +} + +impl<'top> HasRange for RawBinaryEExpression_1_1<'top> { + fn range(&self) -> Range { + self.input.range() + } +} + +impl<'top> Debug for RawBinaryEExpression_1_1<'top> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "", self.id()) + } +} + +impl<'top> RawEExpression<'top, v1_1::Binary> for RawBinaryEExpression_1_1<'top> { + type RawArgumentsIterator<'a> = RawBinarySequenceCacheIterator_1_1<'top> + where + Self: 'a; + + fn id(&self) -> MacroIdRef<'top> { + self.id + } + + fn raw_arguments(&self) -> Self::RawArgumentsIterator<'top> { + RawBinarySequenceCacheIterator_1_1::new(self.arg_expr_cache) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct RawBinarySequenceCacheIterator_1_1<'top> { + child_exprs: &'top [LazyRawValueExpr<'top, v1_1::Binary>], + index: usize, +} + +impl<'top> RawBinarySequenceCacheIterator_1_1<'top> { + pub fn new(child_exprs: &'top [LazyRawValueExpr<'top, v1_1::Binary>]) -> Self { + Self { + child_exprs, + index: 0, + } + } +} + +impl<'top> Iterator for RawBinarySequenceCacheIterator_1_1<'top> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + let next_expr = self.child_exprs.get(self.index)?; + self.index += 1; + Some(Ok(*next_expr)) + } +} diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 94acddbc..78504af2 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -4,16 +4,20 @@ use crate::lazy::binary::raw::v1_1::value::{ LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, }; use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES}; +use crate::lazy::decoder::{LazyRawValueExpr, RawValueExpr}; use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; -use crate::{IonError, IonResult}; +use crate::{v1_1, IonError, IonResult}; use std::fmt::{Debug, Formatter}; use std::ops::Range; +use bumpalo::Bump as BumpAllocator; + /// A buffer of unsigned bytes that can be cheaply copied and which defines methods for parsing /// the various encoding elements of a binary Ion stream. /// @@ -21,7 +25,7 @@ use std::ops::Range; /// and a copy of the `ImmutableBuffer` that starts _after_ the bytes that were parsed. /// /// Methods that `peek` at the input stream do not return a copy of the buffer. -#[derive(PartialEq, Clone, Copy)] +#[derive(Clone, Copy)] pub struct ImmutableBuffer<'a> { // `data` is a slice of remaining data in the larger input stream. // `offset` is the position in the overall input stream where that slice begins. @@ -32,6 +36,7 @@ pub struct ImmutableBuffer<'a> { // offset: 6 data: &'a [u8], offset: usize, + allocator: &'a BumpAllocator, } impl<'a> Debug for ImmutableBuffer<'a> { @@ -49,12 +54,20 @@ pub(crate) type ParseResult<'a, T> = IonResult<(T, ImmutableBuffer<'a>)>; impl<'a> ImmutableBuffer<'a> { /// Constructs a new `ImmutableBuffer` that wraps `data`. #[inline] - pub fn new(data: &[u8]) -> ImmutableBuffer { - Self::new_with_offset(data, 0) + pub fn new(allocator: &'a BumpAllocator, data: &'a [u8]) -> ImmutableBuffer<'a> { + Self::new_with_offset(allocator, data, 0) } - pub fn new_with_offset(data: &[u8], offset: usize) -> ImmutableBuffer { - ImmutableBuffer { data, offset } + pub fn new_with_offset( + allocator: &'a BumpAllocator, + data: &'a [u8], + offset: usize, + ) -> ImmutableBuffer<'a> { + ImmutableBuffer { + data, + offset, + allocator, + } } /// Returns a slice containing all of the buffer's bytes. @@ -75,6 +88,7 @@ impl<'a> ImmutableBuffer<'a> { ImmutableBuffer { data: self.bytes_range(offset, length), offset: self.offset + offset, + allocator: self.allocator, } } @@ -119,6 +133,7 @@ impl<'a> ImmutableBuffer<'a> { Self { data: &self.data[num_bytes_to_consume..], offset: self.offset + num_bytes_to_consume, + allocator: self.allocator, } } @@ -143,7 +158,7 @@ impl<'a> ImmutableBuffer<'a> { match bytes { [0xE0, major, minor, 0xEA] => { - let matched = ImmutableBuffer::new_with_offset(bytes, self.offset); + let matched = ImmutableBuffer::new_with_offset(self.allocator, bytes, self.offset); let marker = LazyRawBinaryVersionMarker_1_1::new(matched, *major, *minor); Ok((marker, self.consume(IVM.len()))) } @@ -249,7 +264,9 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions, /// and at the top level. - pub(crate) fn peek_sequence_value(self) -> IonResult>> { + pub(crate) fn peek_sequence_value_expr( + self, + ) -> IonResult>> { if self.is_empty() { return Ok(None); } @@ -266,7 +283,12 @@ impl<'a> ImmutableBuffer<'a> { // Otherwise, there's a value. type_descriptor = input.peek_opcode()?; } - Ok(Some(input.read_value(type_descriptor)?)) + if type_descriptor.is_e_expression() { + return self.read_e_expression(type_descriptor); + } + Ok(Some(RawValueExpr::ValueLiteral( + input.read_value(type_descriptor)?, + ))) } /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that @@ -339,11 +361,12 @@ impl<'a> ImmutableBuffer<'a> { let (annotations_seq, input_after_annotations) = self.read_annotations_sequence(opcode)?; let opcode = input_after_annotations.peek_opcode()?; let mut value = input_after_annotations.read_value_without_annotations(opcode)?; - value.encoded_value.annotations_header_length = annotations_seq.header_length; + let total_annotations_length = + annotations_seq.header_length as usize + annotations_seq.sequence_length as usize; + value.encoded_value.annotations_header_length = total_annotations_length as u16; value.encoded_value.annotations_sequence_length = annotations_seq.sequence_length; value.encoded_value.annotations_encoding = annotations_seq.encoding; - value.encoded_value.total_length += - annotations_seq.header_length as usize + annotations_seq.sequence_length as usize; + value.encoded_value.total_length += total_annotations_length; // Rewind the input to include the annotations sequence value.input = self; Ok(value) @@ -425,6 +448,17 @@ impl<'a> ImmutableBuffer<'a> { ) -> ParseResult<'a, EncodedAnnotations> { todo!() } + + fn read_e_expression( + self, + opcode: Opcode, + ) -> IonResult>> { + if opcode.opcode_type == OpcodeType::EExpressionWithAddress { + let _macro_id = MacroIdRef::LocalAddress(opcode.byte as usize); + // TODO: Add allocator reference to `ImmutableBuffer` so we can cache the arguments + } + todo!() + } } #[derive(Clone, Copy, Debug, PartialEq)] @@ -450,7 +484,8 @@ mod tests { use super::*; fn input_test>(input: A) { - let input = ImmutableBuffer::new(input.as_ref()); + let allocator = BumpAllocator::new(); + let input = ImmutableBuffer::new(&allocator, input.as_ref()); // We can peek at the first byte... assert_eq!(input.peek_next_byte(), Some(b'f')); // ...without modifying the input. Looking at the next 3 bytes still includes 'f'. @@ -485,12 +520,12 @@ mod tests { fn validate_nop_length() { // read_nop_pad reads a single NOP value, this test ensures that we're tracking the right // size for these values. - - let buffer = ImmutableBuffer::new(&[0xECu8]); + let allocator = BumpAllocator::new(); + let buffer = ImmutableBuffer::new(&allocator, &[0xECu8]); let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 1); - let buffer = ImmutableBuffer::new(&[0xEDu8, 0x05, 0x00, 0x00]); + let buffer = ImmutableBuffer::new(&allocator, &[0xEDu8, 0x05, 0x00, 0x00]); let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 4); } diff --git a/src/lazy/binary/raw/v1_1/mod.rs b/src/lazy/binary/raw/v1_1/mod.rs index a27fedd4..03247b90 100644 --- a/src/lazy/binary/raw/v1_1/mod.rs +++ b/src/lazy/binary/raw/v1_1/mod.rs @@ -7,5 +7,7 @@ pub mod r#struct; mod type_code; pub mod value; pub use type_code::*; +pub mod e_expression; pub mod type_descriptor; + pub use type_descriptor::*; diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 200a15d6..44961755 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -1,139 +1,106 @@ #![allow(non_camel_case_types)] use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; -use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::decoder::{Decoder, LazyRawReader, RawVersionMarker}; +use crate::lazy::decoder::{Decoder, LazyRawReader, RawValueExpr, RawVersionMarker}; use crate::lazy::encoder::private::Sealed; use crate::lazy::encoding::BinaryEncoding_1_1; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; -use crate::{Encoding, IonResult}; +use crate::{Encoding, HasRange, IonResult}; use crate::lazy::any_encoding::IonEncoding; use bumpalo::Bump as BumpAllocator; pub struct LazyRawBinaryReader_1_1<'data> { - data: ImmutableBuffer<'data>, - bytes_to_skip: usize, // Bytes to skip in order to advance to the next item. + input: &'data [u8], + // The offset from the beginning of the overall stream at which the `input` slice begins + stream_offset: usize, + // The offset from the beginning of `input` at which the reader is positioned + local_offset: usize, } impl<'data> LazyRawBinaryReader_1_1<'data> { - fn new(data: &'data [u8]) -> Self { - Self::new_with_offset(data, 0) + fn new(input: &'data [u8]) -> Self { + Self::new_with_offset(input, 0) } - fn new_with_offset(data: &'data [u8], offset: usize) -> Self { - let data = ImmutableBuffer::new_with_offset(data, offset); + fn new_with_offset(input: &'data [u8], stream_offset: usize) -> Self { Self { - data, - bytes_to_skip: 0, + input, + stream_offset, + local_offset: 0, } } + fn end_of_stream(&self, position: usize) -> LazyRawStreamItem<'data, BinaryEncoding_1_1> { + RawStreamItem::EndOfStream(EndPosition::new(BinaryEncoding_1_1.encoding(), position)) + } + fn read_ivm<'top>( &mut self, - buffer: ImmutableBuffer<'data>, + buffer: ImmutableBuffer<'top>, ) -> IonResult> where 'data: 'top, { - let (marker, _buffer_after_ivm) = buffer.read_ivm()?; + let (marker, buffer_after_ivm) = buffer.read_ivm()?; let (major, minor) = marker.version(); if (major, minor) != (1, 1) { return IonResult::decoding_error(format!( "unsupported version of Ion: v{major}.{minor}; only 1.1 is supported by this reader", )); } - self.data = buffer; - self.bytes_to_skip = 4; + self.local_offset = buffer_after_ivm.offset() - self.stream_offset; Ok(LazyRawStreamItem::::VersionMarker( marker, )) } - fn read_value<'top>( - &mut self, - buffer: ImmutableBuffer<'data>, + fn read_value_expr<'top>( + &'top mut self, + buffer: ImmutableBuffer<'top>, ) -> IonResult> where 'data: 'top, { - let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? { - Some(lazy_value) => lazy_value, - None => { - return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(BinaryEncoding_1_1.encoding(), self.position()), - )) - } + let item = match buffer.peek_sequence_value_expr()? { + Some(RawValueExpr::ValueLiteral(lazy_value)) => RawStreamItem::Value(lazy_value), + Some(RawValueExpr::MacroInvocation(eexpr)) => RawStreamItem::EExpression(eexpr), + None => self.end_of_stream(buffer.offset()), }; - self.data = buffer; - self.bytes_to_skip = lazy_value.encoded_value.total_length(); - Ok(RawStreamItem::Value(lazy_value)) - } - - fn advance_to_next_item(&self) -> IonResult> { - if self.data.len() < self.bytes_to_skip { - return IonResult::incomplete( - "cannot advance to next item, insufficient data in buffer", - self.data.offset(), - ); - } - - if self.bytes_to_skip > 0 { - Ok(self.data.consume(self.bytes_to_skip)) - } else { - Ok(self.data) - } + let item_range = item.range(); + self.local_offset = item_range.end - self.stream_offset; + Ok(item) } - pub fn next<'top>(&'top mut self) -> IonResult> + pub fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> where 'data: 'top, { - let mut buffer = self.advance_to_next_item()?; + let mut buffer = ImmutableBuffer::new_with_offset( + allocator, + self.input.get(self.local_offset..).unwrap(), + self.position(), + ); + if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()), - )); + return Ok(self.end_of_stream(buffer.offset())); } let type_descriptor = buffer.peek_opcode()?; if type_descriptor.is_nop() { (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()), - )); + return Ok(self.end_of_stream(buffer.offset())); } } if type_descriptor.is_ivm_start() { return self.read_ivm(buffer); } - self.read_value(buffer) - } - - /// Runs the provided parsing function on this reader's buffer. - /// If it succeeds, marks the reader as ready to advance by the 'n' bytes - /// that were consumed. - /// If it does not succeed, the `DataSource` remains unchanged. - pub(crate) fn try_parse_next< - F: Fn(ImmutableBuffer) -> IonResult>>, - >( - &mut self, - parser: F, - ) -> IonResult>> { - let buffer = self.advance_to_next_item()?; - - let lazy_value = match parser(buffer) { - Ok(Some(output)) => output, - Ok(None) => return Ok(None), - Err(e) => return Err(e), - }; - - // If the value we read doesn't start where we began reading, there was a NOP. - // let num_nop_bytes = lazy_value.input.offset() - buffer.offset(); - self.bytes_to_skip = lazy_value.encoded_value.total_length(); - Ok(Some(lazy_value)) + self.read_value_expr(buffer) } } @@ -144,16 +111,6 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 Self::new(data) } - fn next<'top>( - &'top mut self, - _allocator: &'top BumpAllocator, - ) -> IonResult> - where - 'data: 'top, - { - self.next() - } - fn resume_at_offset( data: &'data [u8], offset: usize, @@ -162,8 +119,18 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 Self::new_with_offset(data, offset) } + fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top, + { + self.next(allocator) + } + fn position(&self) -> usize { - self.data.offset() + self.bytes_to_skip + self.stream_offset + self.local_offset } fn encoding(&self) -> IonEncoding { @@ -176,6 +143,7 @@ mod tests { use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; use crate::raw_symbol_ref::RawSymbolRef; use crate::{IonResult, IonType}; + use bumpalo::Bump as BumpAllocator; use rstest::*; #[test] @@ -189,11 +157,16 @@ mod tests { 0xEA, // null.null ]; + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; assert_eq!( - reader.next()?.expect_value()?.read()?.expect_null()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_null()?, IonType::Null ); @@ -207,13 +180,23 @@ mod tests { 0x6E, // true 0x6F, // false ]; - + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; - assert!(reader.next()?.expect_value()?.read()?.expect_bool()?); + assert!(reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_bool()?); - assert!(!(reader.next()?.expect_value()?.read()?.expect_bool()?)); + assert!( + !(reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_bool()?) + ); Ok(()) } @@ -240,30 +223,50 @@ mod tests { // Integer: 147573952589676412929 0xF6, 0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, ]; - + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_int()?, 0.into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_int()?, 17.into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_int()?, (-944).into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_int()?, 1.into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_int()?, 147573952589676412929i128.into() ); Ok(()) @@ -290,24 +293,43 @@ mod tests { 0xF9, 0x31, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x6C, 0x65, 0x6E, 0x67, 0x74, 0x68, 0x20, 0x65, 0x6E, 0x63, 0x6f, 0x64, 0x69, 0x6E, 0x67, ]; - + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_string()?, ""); + assert_eq!( + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_string()?, + "" + ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_string()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_string()?, "hello" ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_string()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_string()?, "fourteen bytes" ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_string()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_string()?, "variable length encoding" ); @@ -341,37 +363,61 @@ mod tests { // Symbol ID: 65,793 0xE3, 0x01, 0x00, 0x00, ]; - + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_symbol()?, "".into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_symbol()?, "fourteen bytes".into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_symbol()?, "variable length encoding".into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_symbol()?, RawSymbolRef::SymbolId(1) ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_symbol()?, RawSymbolRef::SymbolId(257) ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_symbol()?, RawSymbolRef::SymbolId(65793) ); @@ -397,22 +443,37 @@ mod tests { // 3.141592653589793 (double-precision) 0x6D, 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, ]; - + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_float()?, 0.0); + assert_eq!( + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_float()?, + 0.0 + ); // TODO: Implement Half-precision. - // assert_eq!(reader.next()?.expect_value()?.read()?.expect_float()?, 3.14); + // assert_eq!(reader.next(&allocator)?.expect_value()?.read()?.expect_float()?, 3.14); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_float()? as f32, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_float()? as f32, 3.1415927f32, ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_float()?, + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_float()?, std::f64::consts::PI, ); @@ -473,19 +534,19 @@ mod tests { fn decimals(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); + let allocator = BumpAllocator::new(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next()? + .next(&allocator)? .expect_value()? .read()? .expect_decimal()?, reader_txt - .next(&bump)? + .next(&allocator)? .expect_value()? .read()? .expect_decimal()?, @@ -519,11 +580,12 @@ mod tests { use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; let bump = bumpalo::Bump::new(); + let allocator = BumpAllocator::new(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); let expected_value = reader_txt.next(&bump)?.expect_value()?.read()?; - let actual_value = reader_bin.next()?.expect_value()?.read()?; + let actual_value = reader_bin.next(&allocator)?.expect_value()?.read()?; assert!(actual_value .expect_decimal()? @@ -551,18 +613,18 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); + let allocator = BumpAllocator::new(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next()? + .next(&allocator)? .expect_value()? .read()? .expect_timestamp()?, reader_txt - .next(&bump)? + .next(&allocator)? .expect_value()? .read()? .expect_timestamp()?, @@ -583,18 +645,18 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); + let allocator = BumpAllocator::new(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next()? + .next(&allocator)? .expect_value()? .read()? .expect_timestamp()?, reader_txt - .next(&bump)? + .next(&allocator)? .expect_value()? .read()? .expect_timestamp()?, @@ -610,14 +672,22 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; let bytes: &[u8] = &[ 0x49, 0x20, 0x61, 0x70, 0x70, 0x6c, 0x61, 0x75, 0x64, 0x20, 0x79, 0x6f, 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_blob()?, bytes); + assert_eq!( + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_blob()?, + bytes + ); Ok(()) } @@ -630,15 +700,23 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(&allocator)?.expect_ivm()?; let bytes: &[u8] = &[ 0x49, 0x20, 0x61, 0x70, 0x70, 0x6c, 0x61, 0x75, 0x64, 0x20, 0x79, 0x6f, 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_clob()?, bytes); + assert_eq!( + reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_clob()?, + bytes + ); Ok(()) } @@ -695,8 +773,13 @@ mod tests { ]; for (ion_data, expected_types) in tests { + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); - let container = reader.next()?.expect_value()?.read()?.expect_list()?; + let container = reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_list()?; let mut count = 0; for (actual_lazy_value, expected_type) in container.iter().zip(expected_types.iter()) { let value = actual_lazy_value?.expect_value()?; @@ -746,8 +829,13 @@ mod tests { ]; for (ion_data, expected_types) in tests { + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); - let container = reader.next()?.expect_value()?.read()?.expect_sexp()?; + let container = reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_sexp()?; let mut count = 0; for (actual_lazy_value, expected_type) in container.iter().zip(expected_types.iter()) { let value = actual_lazy_value?.expect_value()?; @@ -779,8 +867,13 @@ mod tests { ]; for (data, expected_type) in data { + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let actual_type = reader.next()?.expect_value()?.read()?.expect_null()?; + let actual_type = reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_null()?; assert_eq!(actual_type, expected_type); } Ok(()) @@ -890,8 +983,13 @@ mod tests { ]; for (ion_data, field_pairs) in tests { + let allocator = BumpAllocator::new(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); - let actual_data = reader.next()?.expect_value()?.read()?.expect_struct()?; + let actual_data = reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_struct()?; for (actual_field, expected_field) in actual_data.iter().zip(field_pairs.iter()) { let (expected_name, expected_value_type) = expected_field; diff --git a/src/lazy/binary/raw/v1_1/sequence.rs b/src/lazy/binary/raw/v1_1/sequence.rs index df4bead1..fc58f9b9 100644 --- a/src/lazy/binary/raw/v1_1/sequence.rs +++ b/src/lazy/binary/raw/v1_1/sequence.rs @@ -4,11 +4,9 @@ use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIt use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; use crate::lazy::decoder::private::LazyContainerPrivate; -use crate::lazy::decoder::{ - Decoder, LazyRawContainer, LazyRawSequence, LazyRawValueExpr, RawValueExpr, -}; +use crate::lazy::decoder::{Decoder, LazyRawContainer, LazyRawSequence, LazyRawValueExpr}; use crate::lazy::encoding::BinaryEncoding_1_1; -use crate::{IonResult, IonType}; +use crate::{HasRange, IonResult, IonType}; use std::fmt::{Debug, Formatter}; #[derive(Debug, Copy, Clone)] @@ -151,13 +149,12 @@ impl<'top> Iterator for RawBinarySequenceIterator_1_1<'top> { fn next(&mut self) -> Option { self.source = self.source.consume(self.bytes_to_skip); - match self.source.peek_sequence_value() { - Ok(Some(output)) => { - self.bytes_to_skip = output.encoded_value.total_length; - Some(Ok(RawValueExpr::ValueLiteral(output))) - } - Ok(None) => None, - Err(e) => Some(Err(e)), - } + let item = match self.source.peek_sequence_value_expr() { + Ok(Some(expr)) => expr, + Ok(None) => return None, + Err(e) => return Some(Err(e)), + }; + self.bytes_to_skip = item.range().len(); + Some(Ok(item)) } } diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 2a6c1f8d..56645510 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -9,6 +9,7 @@ pub struct Opcode { pub opcode_type: OpcodeType, pub ion_type: Option, pub low_nibble: u8, + pub byte: u8, } /// A statically defined array of TypeDescriptor that allows a binary reader to map a given @@ -35,6 +36,7 @@ const DEFAULT_HEADER: Opcode = Opcode { opcode_type: OpcodeType::Nop, ion_type: None, low_nibble: 0, + byte: 0, }; pub(crate) const fn init_opcode_cache() -> [Opcode; 256] { @@ -88,6 +90,7 @@ impl Opcode { ion_type, opcode_type, low_nibble: length_code, + byte, } } @@ -99,6 +102,14 @@ impl Opcode { self.opcode_type == OpcodeType::Nop } + pub fn is_e_expression(&self) -> bool { + use OpcodeType::*; + matches!( + self.opcode_type, + EExpressionWithAddress | EExpressionAddressFollows + ) + } + pub fn is_ivm_start(&self) -> bool { self.opcode_type == OpcodeType::IonVersionMarker } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 93f9121c..66418e6c 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -158,10 +158,11 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Returns an `ImmutableBuffer` that contains the bytes comprising this value's encoded /// annotations sequence. fn annotations_sequence(&self) -> ImmutableBuffer<'top> { - let sequence = self.input.slice( - self.encoded_value.annotations_header_length as usize, - self.encoded_value.annotations_sequence_length as usize, - ); + let annotations_header_length = self.encoded_value.annotations_header_length as usize; + let sequence_length = self.encoded_value.annotations_sequence_length as usize; + let sequence = self + .input + .slice(annotations_header_length - sequence_length, sequence_length); sequence } diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index 7810463f..ef0451f9 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -34,6 +34,7 @@ use crate::lazy::text::value::{ LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, }; +use crate::lazy::binary::raw::v1_1::e_expression::RawBinaryEExpression_1_1; use crate::{IonResult, TextFormat, WriteConfig}; /// Marker trait for types that represent an Ion encoding. @@ -242,11 +243,11 @@ impl Decoder for BinaryEncoding_1_1 { type Value<'top> = LazyRawBinaryValue_1_1<'top>; type SExp<'top> = LazyRawBinarySExp_1_1<'top>; type List<'top> = LazyRawBinaryList_1_1<'top>; - type FieldName<'top> = LazyRawBinaryFieldName_1_1<'top>; type Struct<'top> = LazyRawBinaryStruct_1_1<'top>; + type FieldName<'top> = LazyRawBinaryFieldName_1_1<'top>; type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator_1_1<'top>; // TODO: implement macros in 1.1 - type EExp<'top> = Never; + type EExp<'top> = RawBinaryEExpression_1_1<'top>; type VersionMarker<'top> = LazyRawBinaryVersionMarker_1_1<'top>; } diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index f333247a..fdd70bf4 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -34,6 +34,84 @@ pub struct LazyRawTextReader_1_1<'data> { local_offset: usize, } +impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'data> { + fn resume_at_offset( + data: &'data [u8], + offset: usize, + _config: ::ReaderSavedState, + ) -> Self { + LazyRawTextReader_1_1 { + input: data, + // `data` begins at position `offset` within some larger stream. If `data` contains + // the entire stream, this will be zero. + stream_offset: offset, + // Start reading from the beginning of the slice `data` + local_offset: 0, + } + } + + fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top, + { + let input = TextBufferView::new_with_offset( + allocator, + &self.input[self.local_offset..], + self.stream_offset + self.local_offset, + ); + let (buffer_after_whitespace, _whitespace) = input + .match_optional_comments_and_whitespace() + .with_context("reading v1.1 whitespace/comments at the top level", input)?; + if buffer_after_whitespace.is_empty() { + return Ok(RawStreamItem::EndOfStream(EndPosition::new( + TextEncoding_1_1.encoding(), + buffer_after_whitespace.offset(), + ))); + } + + // Consume any trailing whitespace that followed this item. Doing this allows us to check + // whether this was the last item in the buffer by testing `buffer.is_empty()` afterward. + let (buffer_after_item, matched_item) = buffer_after_whitespace + .match_top_level_item_1_1() + .with_context("reading a v1.1 top-level value", buffer_after_whitespace)?; + + let (buffer_after_trailing_ws, _trailing_ws) = buffer_after_item + .match_optional_comments_and_whitespace() + .with_context( + "reading trailing top-level whitespace/comments in v1.1", + buffer_after_item, + )?; + + if let RawStreamItem::VersionMarker(marker) = matched_item { + // TODO: It is not the raw reader's responsibility to report this error. It should + // surface the IVM to the caller, who can then either create a different reader + // for the reported version OR raise an error. + // See: https://github.com/amazon-ion/ion-rust/issues/644 + let (major, minor) = marker.version(); + if (major, minor) != (1, 1) { + return IonResult::decoding_error(format!( + "Ion version {major}.{minor} is not supported" + )); + } + } + // Since we successfully matched the next value, we'll update the buffer + // so a future call to `next()` will resume parsing the remaining input. + self.local_offset = buffer_after_trailing_ws.offset() - self.stream_offset; + Ok(matched_item) + } + + fn position(&self) -> usize { + self.stream_offset + self.local_offset + } + + fn encoding(&self) -> IonEncoding { + IonEncoding::Text_1_1 + } +} + /// The index at which this macro can be found in the macro table. pub type MacroAddress = usize; @@ -135,84 +213,6 @@ impl EncodedTextMacroInvocation { } } -impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'data> { - fn resume_at_offset( - data: &'data [u8], - offset: usize, - _config: ::ReaderSavedState, - ) -> Self { - LazyRawTextReader_1_1 { - input: data, - // `data` begins at position `offset` within some larger stream. If `data` contains - // the entire stream, this will be zero. - stream_offset: offset, - // Start reading from the beginning of the slice `data` - local_offset: 0, - } - } - - fn next<'top>( - &'top mut self, - allocator: &'top BumpAllocator, - ) -> IonResult> - where - 'data: 'top, - { - let input = TextBufferView::new_with_offset( - allocator, - &self.input[self.local_offset..], - self.stream_offset + self.local_offset, - ); - let (buffer_after_whitespace, _whitespace) = input - .match_optional_comments_and_whitespace() - .with_context("reading v1.1 whitespace/comments at the top level", input)?; - if buffer_after_whitespace.is_empty() { - return Ok(RawStreamItem::EndOfStream(EndPosition::new( - TextEncoding_1_1.encoding(), - buffer_after_whitespace.offset(), - ))); - } - - // Consume any trailing whitespace that followed this item. Doing this allows us to check - // whether this was the last item in the buffer by testing `buffer.is_empty()` afterward. - let (buffer_after_item, matched_item) = buffer_after_whitespace - .match_top_level_item_1_1() - .with_context("reading a v1.1 top-level value", buffer_after_whitespace)?; - - let (buffer_after_trailing_ws, _trailing_ws) = buffer_after_item - .match_optional_comments_and_whitespace() - .with_context( - "reading trailing top-level whitespace/comments in v1.1", - buffer_after_item, - )?; - - if let RawStreamItem::VersionMarker(marker) = matched_item { - // TODO: It is not the raw reader's responsibility to report this error. It should - // surface the IVM to the caller, who can then either create a different reader - // for the reported version OR raise an error. - // See: https://github.com/amazon-ion/ion-rust/issues/644 - let (major, minor) = marker.version(); - if (major, minor) != (1, 1) { - return IonResult::decoding_error(format!( - "Ion version {major}.{minor} is not supported" - )); - } - } - // Since we successfully matched the next value, we'll update the buffer - // so a future call to `next()` will resume parsing the remaining input. - self.local_offset = buffer_after_trailing_ws.offset() - self.stream_offset; - Ok(matched_item) - } - - fn position(&self) -> usize { - self.stream_offset + self.local_offset - } - - fn encoding(&self) -> IonEncoding { - IonEncoding::Text_1_1 - } -} - #[derive(Copy, Clone)] pub struct LazyRawTextList_1_1<'top> { pub(crate) value: LazyRawTextValue_1_1<'top>, From e1f2ba8ca69f03c707161baec9ba5b4cb72fb9a7 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 10 Jun 2024 09:01:17 -0400 Subject: [PATCH 11/15] plumb EncodingContextRef into buffer types --- src/lazy/any_encoding.rs | 46 +++--- src/lazy/binary/raw/reader.rs | 4 +- src/lazy/binary/raw/v1_1/e_expression.rs | 21 ++- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 111 +++++++++---- src/lazy/binary/raw/v1_1/reader.rs | 155 ++++++++----------- src/lazy/decoder.rs | 17 +- src/lazy/encoder/text/v1_1/writer.rs | 7 +- src/lazy/expanded/e_expression.rs | 2 +- src/lazy/expanded/mod.rs | 16 +- src/lazy/expanded/sequence.rs | 2 +- src/lazy/streaming_raw_reader.rs | 40 +++-- src/lazy/text/buffer.rs | 96 +++++++----- src/lazy/text/matched.rs | 53 +++---- src/lazy/text/raw/reader.rs | 31 ++-- src/lazy/text/raw/sequence.rs | 6 +- src/lazy/text/raw/struct.rs | 11 +- src/lazy/text/raw/v1_1/reader.rs | 26 ++-- src/lazy/text/value.rs | 11 +- src/lib.rs | 2 +- 19 files changed, 367 insertions(+), 290 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 84ff9ad1..039a1020 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -34,6 +34,7 @@ use crate::lazy::encoding::{ BinaryEncoding_1_0, BinaryEncoding_1_1, TextEncoding_1_0, TextEncoding_1_1, }; use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::span::Span; @@ -54,7 +55,6 @@ use crate::lazy::text::value::{ LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, }; use crate::{Encoding, IonResult, IonType, RawSymbolRef}; -use bumpalo::Bump as BumpAllocator; /// An implementation of the `LazyDecoder` trait that can read any encoding of Ion. #[derive(Debug, Clone, Copy)] @@ -262,8 +262,8 @@ impl<'top> Iterator for LazyRawAnyMacroArgsIterator<'top> { Some(Ok(RawValueExpr::ValueLiteral(value))) => { Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) } - Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { - Some(Ok(RawValueExpr::MacroInvocation(LazyRawAnyEExpression { + Some(Ok(RawValueExpr::EExp(invocation))) => { + Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression { encoding: LazyRawAnyEExpressionKind::Text_1_1(invocation), }))) } @@ -408,17 +408,17 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { use RawReaderKind::*; match &mut self.encoding { - Text_1_0(r) => Ok(r.next(allocator)?.into()), + Text_1_0(r) => Ok(r.next(context)?.into()), Binary_1_0(r) => Ok(r.next()?.into()), - Text_1_1(r) => Ok(r.next(allocator)?.into()), - Binary_1_1(r) => Ok(r.next(allocator)?.into()), + Text_1_1(r) => Ok(r.next(context)?.into()), + Binary_1_1(r) => Ok(r.next(context)?.into()), } } @@ -517,7 +517,7 @@ impl<'top> From> for LazyRawValueExpr<' fn from(value: LazyRawValueExpr<'top, TextEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in text Ion 1.0"), + RawValueExpr::EExp(_) => unreachable!("macro invocation in text Ion 1.0"), } } } @@ -528,7 +528,7 @@ impl<'top> From> fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in binary Ion 1.0"), + RawValueExpr::EExp(_) => unreachable!("macro invocation in binary Ion 1.0"), } } } @@ -537,11 +537,11 @@ impl<'top> From> for LazyRawValueExpr<' fn from(value: LazyRawValueExpr<'top, TextEncoding_1_1>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(m) => { + RawValueExpr::EExp(m) => { let invocation = LazyRawAnyEExpression { encoding: LazyRawAnyEExpressionKind::Text_1_1(m), }; - RawValueExpr::MacroInvocation(invocation) + RawValueExpr::EExp(invocation) } } } @@ -553,11 +553,11 @@ impl<'top> From> fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_1>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(m) => { + RawValueExpr::EExp(m) => { let invocation = LazyRawAnyEExpression { encoding: LazyRawAnyEExpressionKind::Binary_1_1(m), }; - RawValueExpr::MacroInvocation(invocation) + RawValueExpr::EExp(invocation) } } } @@ -1473,41 +1473,41 @@ mod tests { #[test] fn any_encoding() -> IonResult<()> { fn test_input(data: &[u8]) -> IonResult<()> { - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawAnyReader::new(data); - assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 0)); + assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); let _strukt = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_struct()?; - let name = reader.next(&allocator)?.expect_value()?; + let name = reader.next(context)?.expect_value()?; assert_eq!( name.annotations().next().unwrap()?, RawSymbolRef::SymbolId(4) ); assert_eq!(name.read()?.expect_string()?.text(), "Gary"); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::String("foo".into()) ); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::Int(5.into()) ); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::Timestamp(Timestamp::with_year(2023).with_month(8).build()?) ); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::Bool(false) ); let mut sum = 0; for lazy_value_result in reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_list()? @@ -1521,7 +1521,7 @@ mod tests { // local symbol table and the raw reader interprets that as a different value. assert!(matches!( - reader.next(&allocator)?, + reader.next(context)?, LazyRawStreamItem::::EndOfStream(_) )); Ok(()) diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index 341b15c2..5b9b5a5b 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -9,7 +9,7 @@ use crate::result::IonFailure; use crate::{Encoding, IonResult}; use crate::lazy::any_encoding::IonEncoding; -use bumpalo::Bump as BumpAllocator; +use crate::lazy::expanded::EncodingContextRef; /// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue_1_0`]s representing the top level values found /// in the provided input stream. @@ -125,7 +125,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader_1_0 fn next<'top>( &'top mut self, - _allocator: &'top BumpAllocator, + _context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index 958a3e3b..d768da16 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -6,9 +6,10 @@ use std::ops::Range; use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::decoder::LazyRawValueExpr; use crate::lazy::expanded::macro_evaluator::RawEExpression; -use crate::lazy::text::raw::v1_1::reader::{EncodedTextMacroInvocation, MacroIdRef}; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::{v1_1, HasRange, HasSpan, IonResult, Span}; +#[derive(Copy, Clone)] pub struct EncodedBinaryEExp { // The number of bytes that were used to encode the e-expression's header (including its ID) header_length: u16, @@ -22,12 +23,28 @@ impl EncodedBinaryEExp { #[derive(Copy, Clone)] pub struct RawBinaryEExpression_1_1<'top> { - pub(crate) encoded_expr: EncodedTextMacroInvocation, + pub(crate) encoded_expr: EncodedBinaryEExp, pub(crate) input: ImmutableBuffer<'top>, pub(crate) id: MacroIdRef<'top>, pub(crate) arg_expr_cache: &'top [LazyRawValueExpr<'top, v1_1::Binary>], } +impl<'top> RawBinaryEExpression_1_1<'top> { + pub fn new( + id: MacroIdRef<'top>, + encoded_expr: EncodedBinaryEExp, + input: ImmutableBuffer<'top>, + arg_expr_cache: &'top [LazyRawValueExpr<'top, v1_1::Binary>], + ) -> Self { + Self { + encoded_expr, + input, + id, + arg_expr_cache, + } + } +} + impl<'top> HasSpan<'top> for RawBinaryEExpression_1_1<'top> { fn span(&self) -> Span<'top> { Span::with_offset(self.input.offset(), self.input.bytes()) diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 78504af2..8bfa4d33 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -1,5 +1,11 @@ +use std::fmt::{Debug, Formatter}; +use std::ops::Range; + +use bumpalo::collections::Vec as BumpVec; + use crate::binary::constants::v1_1::IVM; use crate::lazy::binary::encoded_value::EncodedValue; +use crate::lazy::binary::raw::v1_1::e_expression::{EncodedBinaryEExp, RawBinaryEExpression_1_1}; use crate::lazy::binary::raw::v1_1::value::{ LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, }; @@ -10,13 +16,11 @@ use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; +use crate::lazy::expanded::macro_table::MacroKind; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; -use crate::{v1_1, IonError, IonResult}; -use std::fmt::{Debug, Formatter}; -use std::ops::Range; - -use bumpalo::Bump as BumpAllocator; +use crate::{v1_1, HasRange, IonError, IonResult}; /// A buffer of unsigned bytes that can be cheaply copied and which defines methods for parsing /// the various encoding elements of a binary Ion stream. @@ -36,7 +40,7 @@ pub struct ImmutableBuffer<'a> { // offset: 6 data: &'a [u8], offset: usize, - allocator: &'a BumpAllocator, + context: EncodingContextRef<'a>, } impl<'a> Debug for ImmutableBuffer<'a> { @@ -54,19 +58,19 @@ pub(crate) type ParseResult<'a, T> = IonResult<(T, ImmutableBuffer<'a>)>; impl<'a> ImmutableBuffer<'a> { /// Constructs a new `ImmutableBuffer` that wraps `data`. #[inline] - pub fn new(allocator: &'a BumpAllocator, data: &'a [u8]) -> ImmutableBuffer<'a> { - Self::new_with_offset(allocator, data, 0) + pub fn new(context: EncodingContextRef<'a>, data: &'a [u8]) -> ImmutableBuffer<'a> { + Self::new_with_offset(context, data, 0) } pub fn new_with_offset( - allocator: &'a BumpAllocator, + context: EncodingContextRef<'a>, data: &'a [u8], offset: usize, ) -> ImmutableBuffer<'a> { ImmutableBuffer { data, offset, - allocator, + context, } } @@ -88,7 +92,7 @@ impl<'a> ImmutableBuffer<'a> { ImmutableBuffer { data: self.bytes_range(offset, length), offset: self.offset + offset, - allocator: self.allocator, + context: self.context, } } @@ -133,7 +137,7 @@ impl<'a> ImmutableBuffer<'a> { Self { data: &self.data[num_bytes_to_consume..], offset: self.offset + num_bytes_to_consume, - allocator: self.allocator, + context: self.context, } } @@ -158,7 +162,7 @@ impl<'a> ImmutableBuffer<'a> { match bytes { [0xE0, major, minor, 0xEA] => { - let matched = ImmutableBuffer::new_with_offset(self.allocator, bytes, self.offset); + let matched = ImmutableBuffer::new_with_offset(self.context, bytes, self.offset); let marker = LazyRawBinaryVersionMarker_1_1::new(matched, *major, *minor); Ok((marker, self.consume(IVM.len()))) } @@ -284,7 +288,9 @@ impl<'a> ImmutableBuffer<'a> { type_descriptor = input.peek_opcode()?; } if type_descriptor.is_e_expression() { - return self.read_e_expression(type_descriptor); + return Ok(Some(RawValueExpr::EExp( + self.read_e_expression(type_descriptor)?, + ))); } Ok(Some(RawValueExpr::ValueLiteral( input.read_value(type_descriptor)?, @@ -449,15 +455,66 @@ impl<'a> ImmutableBuffer<'a> { todo!() } - fn read_e_expression( - self, - opcode: Opcode, - ) -> IonResult>> { - if opcode.opcode_type == OpcodeType::EExpressionWithAddress { - let _macro_id = MacroIdRef::LocalAddress(opcode.byte as usize); - // TODO: Add allocator reference to `ImmutableBuffer` so we can cache the arguments + fn read_e_expression(self, opcode: Opcode) -> IonResult> { + use OpcodeType::*; + let (macro_id, buffer_after_id) = match opcode.opcode_type { + EExpressionWithAddress => ( + MacroIdRef::LocalAddress(opcode.byte as usize), + self.consume(1), + ), + EExpressionAddressFollows => todo!("e-expr with trailing address"), + _ => unreachable!("read_e_expression called with invalid opcode"), + }; + + // TODO: When we support untagged parameter encodings, we need to use the signature's + // parameter encodings to drive this process. For now (while everything is tagged) + // and cardinality is always required, we just loop `n` times. + let macro_def = self + .context + .macro_table + .macro_with_id(macro_id) + .ok_or_else(|| { + IonError::decoding_error(format!("invocation of unknown macro '{macro_id:?}'")) + })?; + // TODO: The macro table should have a Signature on file for each of the system macros too. + // For now, we simply say how many arguments to expect. + use MacroKind::*; + let num_parameters = match macro_def.kind() { + Void => 0, + Values => 1, + MakeString => 1, + Template(t) => t.signature().parameters().len(), + }; + + let mut args_buffer = buffer_after_id; + let mut args_cache = self + .context + .allocator + .alloc_with(|| BumpVec::with_capacity_in(num_parameters, self.context.allocator)); + for _ in 0..num_parameters { + let value_expr = match buffer_after_id.peek_sequence_value_expr()? { + Some(expr) => expr, + None => { + return IonResult::incomplete( + "found an incomplete e-expression", + buffer_after_id.offset(), + ) + } + }; + args_buffer = args_buffer.consume(value_expr.range().len()); + args_cache.push(value_expr); } - todo!() + let macro_id_encoded_length = buffer_after_id.offset() - self.offset(); + let args_length = args_buffer.offset() - buffer_after_id.offset(); + let e_expression_buffer = self.slice(0, macro_id_encoded_length + args_length); + + let e_expression = RawBinaryEExpression_1_1::new( + macro_id, + EncodedBinaryEExp::new(macro_id_encoded_length as u16), + e_expression_buffer, + args_cache, + ); + Ok(e_expression) } } @@ -484,8 +541,8 @@ mod tests { use super::*; fn input_test>(input: A) { - let allocator = BumpAllocator::new(); - let input = ImmutableBuffer::new(&allocator, input.as_ref()); + let context = EncodingContextRef::unit_test_context(); + let input = ImmutableBuffer::new(context, input.as_ref()); // We can peek at the first byte... assert_eq!(input.peek_next_byte(), Some(b'f')); // ...without modifying the input. Looking at the next 3 bytes still includes 'f'. @@ -520,12 +577,12 @@ mod tests { fn validate_nop_length() { // read_nop_pad reads a single NOP value, this test ensures that we're tracking the right // size for these values. - let allocator = BumpAllocator::new(); - let buffer = ImmutableBuffer::new(&allocator, &[0xECu8]); + let context = EncodingContextRef::unit_test_context(); + let buffer = ImmutableBuffer::new(context, &[0xECu8]); let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 1); - let buffer = ImmutableBuffer::new(&allocator, &[0xEDu8, 0x05, 0x00, 0x00]); + let buffer = ImmutableBuffer::new(context, &[0xEDu8, 0x05, 0x00, 0x00]); let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 4); } diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 44961755..64bd744e 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -9,7 +9,7 @@ use crate::result::IonFailure; use crate::{Encoding, HasRange, IonResult}; use crate::lazy::any_encoding::IonEncoding; -use bumpalo::Bump as BumpAllocator; +use crate::lazy::expanded::EncodingContextRef; pub struct LazyRawBinaryReader_1_1<'data> { input: &'data [u8], @@ -65,7 +65,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { { let item = match buffer.peek_sequence_value_expr()? { Some(RawValueExpr::ValueLiteral(lazy_value)) => RawStreamItem::Value(lazy_value), - Some(RawValueExpr::MacroInvocation(eexpr)) => RawStreamItem::EExpression(eexpr), + Some(RawValueExpr::EExp(eexpr)) => RawStreamItem::EExpression(eexpr), None => self.end_of_stream(buffer.offset()), }; let item_range = item.range(); @@ -75,13 +75,13 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { pub fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { let mut buffer = ImmutableBuffer::new_with_offset( - allocator, + context, self.input.get(self.local_offset..).unwrap(), self.position(), ); @@ -121,12 +121,12 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { - self.next(allocator) + self.next(context) } fn position(&self) -> usize { @@ -141,9 +141,9 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 #[cfg(test)] mod tests { use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; + use crate::lazy::expanded::EncodingContextRef; use crate::raw_symbol_ref::RawSymbolRef; use crate::{IonResult, IonType}; - use bumpalo::Bump as BumpAllocator; use rstest::*; #[test] @@ -157,13 +157,13 @@ mod tests { 0xEA, // null.null ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_null()?, @@ -180,19 +180,19 @@ mod tests { 0x6E, // true 0x6F, // false ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert!(reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_bool()?); assert!( !(reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_bool()?) @@ -223,50 +223,30 @@ mod tests { // Integer: 147573952589676412929 0xF6, 0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( - reader - .next(&allocator)? - .expect_value()? - .read()? - .expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 0.into() ); assert_eq!( - reader - .next(&allocator)? - .expect_value()? - .read()? - .expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 17.into() ); assert_eq!( - reader - .next(&allocator)? - .expect_value()? - .read()? - .expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, (-944).into() ); assert_eq!( - reader - .next(&allocator)? - .expect_value()? - .read()? - .expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 1.into() ); assert_eq!( - reader - .next(&allocator)? - .expect_value()? - .read()? - .expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 147573952589676412929i128.into() ); Ok(()) @@ -293,13 +273,13 @@ mod tests { 0xF9, 0x31, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x6C, 0x65, 0x6E, 0x67, 0x74, 0x68, 0x20, 0x65, 0x6E, 0x63, 0x6f, 0x64, 0x69, 0x6E, 0x67, ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_string()?, @@ -308,7 +288,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_string()?, @@ -317,7 +297,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_string()?, @@ -326,7 +306,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_string()?, @@ -363,13 +343,13 @@ mod tests { // Symbol ID: 65,793 0xE3, 0x01, 0x00, 0x00, ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_symbol()?, @@ -378,7 +358,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_symbol()?, @@ -387,7 +367,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_symbol()?, @@ -396,7 +376,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_symbol()?, @@ -405,7 +385,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_symbol()?, @@ -414,7 +394,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_symbol()?, @@ -443,13 +423,13 @@ mod tests { // 3.141592653589793 (double-precision) 0x6D, 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_float()?, @@ -457,11 +437,11 @@ mod tests { ); // TODO: Implement Half-precision. - // assert_eq!(reader.next(&allocator)?.expect_value()?.read()?.expect_float()?, 3.14); + // assert_eq!(reader.next(context)?.expect_value()?.read()?.expect_float()?, 3.14); assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_float()? as f32, @@ -470,7 +450,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_float()?, @@ -534,19 +514,19 @@ mod tests { fn decimals(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_decimal()?, reader_txt - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_decimal()?, @@ -578,14 +558,13 @@ mod tests { use crate::ion_data::IonEq; use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); - let expected_value = reader_txt.next(&bump)?.expect_value()?.read()?; - let actual_value = reader_bin.next(&allocator)?.expect_value()?.read()?; + let expected_value = reader_txt.next(context)?.expect_value()?.read()?; + let actual_value = reader_bin.next(context)?.expect_value()?.read()?; assert!(actual_value .expect_decimal()? @@ -613,18 +592,18 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, reader_txt - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, @@ -645,18 +624,18 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, reader_txt - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, @@ -672,9 +651,9 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; let bytes: &[u8] = &[ 0x49, 0x20, 0x61, 0x70, 0x70, 0x6c, 0x61, 0x75, 0x64, 0x20, 0x79, 0x6f, 0x75, 0x72, @@ -682,7 +661,7 @@ mod tests { ]; assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_blob()?, @@ -700,9 +679,9 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next(&allocator)?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; let bytes: &[u8] = &[ 0x49, 0x20, 0x61, 0x70, 0x70, 0x6c, 0x61, 0x75, 0x64, 0x20, 0x79, 0x6f, 0x75, 0x72, @@ -711,7 +690,7 @@ mod tests { assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_clob()?, @@ -773,10 +752,10 @@ mod tests { ]; for (ion_data, expected_types) in tests { - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); let container = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_list()?; @@ -829,10 +808,10 @@ mod tests { ]; for (ion_data, expected_types) in tests { - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); let container = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_sexp()?; @@ -867,10 +846,10 @@ mod tests { ]; for (data, expected_type) in data { - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let actual_type = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_null()?; @@ -983,10 +962,10 @@ mod tests { ]; for (ion_data, field_pairs) in tests { - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); let actual_data = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_struct()?; diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs index 8bddb85a..3658b332 100644 --- a/src/lazy/decoder.rs +++ b/src/lazy/decoder.rs @@ -1,11 +1,10 @@ use std::fmt::Debug; use std::ops::Range; -use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::IonEncoding; use crate::lazy::encoding::{BinaryEncoding_1_0, RawValueLiteral, TextEncoding_1_0}; use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::span::Span; @@ -77,11 +76,11 @@ pub trait RawVersionMarker<'top>: Debug + Copy + Clone + HasSpan<'top> { /// When working with `RawValueExpr`s that always use a given decoder's `Value` and /// `MacroInvocation` associated types, consider using [`LazyRawValueExpr`] instead. #[derive(Copy, Clone, Debug, PartialEq)] -pub enum RawValueExpr { +pub enum RawValueExpr { /// A value literal. For example: `5`, `foo`, or `"hello"` in text. ValueLiteral(V), /// An Ion 1.1+ macro invocation. For example: `(:employee 12345 "Sarah" "Gonzalez")` in text. - MacroInvocation(M), + EExp(E), } // `RawValueExpr` above has no ties to a particular encoding. The `LazyRawValueExpr` type alias @@ -102,7 +101,7 @@ impl RawValueExpr { pub fn expect_value(self) -> IonResult { match self { RawValueExpr::ValueLiteral(v) => Ok(v), - RawValueExpr::MacroInvocation(_m) => IonResult::decoding_error( + RawValueExpr::EExp(_m) => IonResult::decoding_error( "expected a value literal, but found a macro invocation ({:?})", ), } @@ -114,7 +113,7 @@ impl RawValueExpr { "expected a macro invocation but found a value literal ({:?})", v )), - RawValueExpr::MacroInvocation(m) => Ok(m), + RawValueExpr::EExp(m) => Ok(m), } } } @@ -123,7 +122,7 @@ impl HasRange for RawValueExpr { fn range(&self) -> Range { match self { RawValueExpr::ValueLiteral(value) => value.range(), - RawValueExpr::MacroInvocation(eexp) => eexp.range(), + RawValueExpr::EExp(eexp) => eexp.range(), } } } @@ -132,7 +131,7 @@ impl<'top, V: HasSpan<'top>, M: HasSpan<'top>> HasSpan<'top> for RawValueExpr Span<'top> { match self { RawValueExpr::ValueLiteral(value) => value.span(), - RawValueExpr::MacroInvocation(eexp) => eexp.span(), + RawValueExpr::EExp(eexp) => eexp.span(), } } } @@ -352,7 +351,7 @@ pub trait LazyRawReader<'data, D: Decoder>: Sized { fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top; diff --git a/src/lazy/encoder/text/v1_1/writer.rs b/src/lazy/encoder/text/v1_1/writer.rs index 62777ec5..5772aebf 100644 --- a/src/lazy/encoder/text/v1_1/writer.rs +++ b/src/lazy/encoder/text/v1_1/writer.rs @@ -100,6 +100,7 @@ mod tests { use crate::lazy::encoder::write_as_ion::WriteAsSExp; use crate::lazy::encoder::LazyRawWriter; use crate::lazy::expanded::macro_evaluator::RawEExpression; + use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::raw::v1_1::reader::{LazyRawTextReader_1_1, MacroIdRef}; use crate::symbol_ref::AsSymbolRef; use crate::{ @@ -263,9 +264,9 @@ mod tests { println!("{encoded_text}"); let mut reader = LazyRawTextReader_1_1::new(encoded_text.as_bytes()); - let bump = bumpalo::Bump::new(); - let _marker = reader.next(&bump)?.expect_ivm()?; - let eexp = reader.next(&bump)?.expect_macro_invocation()?; + let context = EncodingContextRef::unit_test_context(); + let _marker = reader.next(context)?.expect_ivm()?; + let eexp = reader.next(context)?.expect_macro_invocation()?; assert_eq!(MacroIdRef::LocalName("foo"), eexp.id()); let mut args = eexp.raw_arguments(); let int_arg = args.next().unwrap()?.expect_value()?.read()?.expect_int()?; diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index 0bc4bdd2..cde06907 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -87,7 +87,7 @@ impl<'top, D: Decoder> Iterator for EExpressionArgsIterator<'top, D> { LazyRawValueExpr::::ValueLiteral(value) => { ValueExpr::ValueLiteral(LazyExpandedValue::from_literal(self.context, value)) } - LazyRawValueExpr::::MacroInvocation(raw_invocation) => { + LazyRawValueExpr::::EExp(raw_invocation) => { let invocation = match raw_invocation.resolve(self.context) { Ok(invocation) => invocation, Err(e) => return Some(Err(e)), diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index b6fa9eb9..4c13322c 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -124,6 +124,20 @@ impl<'top> EncodingContextRef<'top> { pub fn new(context: &'top EncodingContext<'top>) -> Self { Self { context } } + + #[cfg(test)] + pub fn unit_test_context() -> EncodingContextRef<'static> { + // For the sake of the unit tests, make a dummy encoding context with no lifetime + // constraints. + let macro_table_ref: &'static MacroTable = Box::leak(Box::new(MacroTable::new())); + let symbol_table_ref: &'static SymbolTable = Box::leak(Box::new(SymbolTable::new())); + let allocator_ref: &'static BumpAllocator = Box::leak(Box::new(BumpAllocator::new())); + let empty_context: EncodingContext<'static> = + EncodingContext::new(macro_table_ref, symbol_table_ref, allocator_ref); + let context: EncodingContextRef<'static> = + EncodingContextRef::new(Box::leak(Box::new(empty_context))); + context + } } impl<'top> Deref for EncodingContextRef<'top> { @@ -408,7 +422,7 @@ impl ExpandingReader { // Pull another top-level expression from the input stream if one is available. use crate::lazy::raw_stream_item::RawStreamItem::*; let raw_reader = unsafe { &mut *self.raw_reader.get() }; - match raw_reader.next(allocator)? { + match raw_reader.next(context_ref)? { VersionMarker(marker) => return Ok(SystemStreamItem::VersionMarker(marker)), // We got our value; return it. Value(raw_value) => { diff --git a/src/lazy/expanded/sequence.rs b/src/lazy/expanded/sequence.rs index 9f297625..38b7f810 100644 --- a/src/lazy/expanded/sequence.rs +++ b/src/lazy/expanded/sequence.rs @@ -336,7 +336,7 @@ fn expand_next_sequence_value<'top, D: Decoder>( Some(Ok(RawValueExpr::ValueLiteral(value))) => { return Some(Ok(LazyExpandedValue::from_literal(context, value))) } - Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { + Some(Ok(RawValueExpr::EExp(invocation))) => { let resolved_invocation = match invocation.resolve(context) { Ok(resolved) => resolved, Err(e) => return Some(Err(e)), diff --git a/src/lazy/streaming_raw_reader.rs b/src/lazy/streaming_raw_reader.rs index 1d9974be..f17e31d0 100644 --- a/src/lazy/streaming_raw_reader.rs +++ b/src/lazy/streaming_raw_reader.rs @@ -3,10 +3,9 @@ use std::fs::File; use std::io; use std::io::{BufReader, Read, StdinLock}; -use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::IonEncoding; use crate::lazy::decoder::{Decoder, LazyRawReader}; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::{AnyEncoding, IonError, IonResult, LazyRawValue}; @@ -79,7 +78,7 @@ impl StreamingRawReader { pub fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> { let mut input_source_exhausted = false; loop { @@ -102,7 +101,7 @@ impl StreamingRawReader { )); let slice_reader = unsafe { &mut *unsafe_cell_reader.get() }; let starting_position = slice_reader.position(); - let result = slice_reader.next(allocator); + let result = slice_reader.next(context); // We're done modifying `slice_reader`, but we need to read some of its fields. These // fields are _not_ the data to which `result` holds a reference. We have to circumvent // the borrow checker's limitation (described in a comment on the StreamingRawReader type) @@ -431,10 +430,9 @@ mod tests { use std::io; use std::io::{BufReader, Cursor, Read}; - use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::AnyEncoding; use crate::lazy::decoder::{Decoder, LazyRawValue}; + use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::streaming_raw_reader::{IonInput, StreamingRawReader}; @@ -466,23 +464,23 @@ mod tests { #[test] fn read_empty_slice() -> IonResult<()> { - let bump = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let ion = ""; let mut reader = StreamingRawReader::new(AnyEncoding, ion.as_bytes()); // We expect `Ok(EndOfStream)`, not `Err(Incomplete)`. - expect_end_of_stream(reader.next(&bump)?)?; + expect_end_of_stream(reader.next(context)?)?; Ok(()) } fn read_example_stream(input: impl IonInput) -> IonResult<()> { - let bump = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = StreamingRawReader::new(AnyEncoding, input); - expect_string(reader.next(&bump)?, "foo")?; - expect_string(reader.next(&bump)?, "bar")?; - expect_string(reader.next(&bump)?, "baz")?; - expect_string(reader.next(&bump)?, "quux")?; - expect_string(reader.next(&bump)?, "quuz")?; - expect_end_of_stream(reader.next(&bump)?) + expect_string(reader.next(context)?, "foo")?; + expect_string(reader.next(context)?, "bar")?; + expect_string(reader.next(context)?, "baz")?; + expect_string(reader.next(context)?, "quux")?; + expect_string(reader.next(context)?, "quuz")?; + expect_end_of_stream(reader.next(context)?) } // This stream is 104 bytes long @@ -522,9 +520,9 @@ mod tests { const INVALID_EXAMPLE_STREAM: &str = "2024-03-12T16:33.000-05:"; // Missing offset minutes fn read_invalid_example_stream(input: impl IonInput) -> IonResult<()> { - let bump = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = StreamingRawReader::new(AnyEncoding, input); - let result = reader.next(&bump); + let result = reader.next(context); // Because the input stream is exhausted, the incomplete value is illegal data and raises // a decoding error. assert!(matches!(result, Err(IonError::Decoding(_))), "{:?}", result); @@ -569,19 +567,19 @@ mod tests { } // This guarantees that there are several intermediate reading states in which the buffer // contains incomplete data that could be misinterpreted by a reader. - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = StreamingRawReader::new(v1_0::Text, IonStream::new(input)); - assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 0)); + assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_decimal()?, Decimal::new(87125, -2) ); - let value = reader.next(&allocator)?.expect_value()?; + let value = reader.next(context)?.expect_value()?; let annotations = value .annotations() .collect::>>()?; diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index ae5073cc..d3769417 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -23,6 +23,7 @@ use nom::{AsBytes, CompareResult, IResult, InputLength, InputTake, Needed, Parse use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawValueExpr}; use crate::lazy::encoding::{TextEncoding, TextEncoding_1_0, TextEncoding_1_1}; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::encoded_value::EncodedTextValue; use crate::lazy::text::matched::{ @@ -104,7 +105,7 @@ pub struct TextBufferView<'top> { // offset: 6 data: &'top [u8], offset: usize, - pub(crate) allocator: &'top BumpAllocator, + pub(crate) context: EncodingContextRef<'top>, } impl<'a> PartialEq for TextBufferView<'a> { @@ -116,8 +117,8 @@ impl<'a> PartialEq for TextBufferView<'a> { impl<'top> TextBufferView<'top> { /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to zero. #[inline] - pub fn new(allocator: &'top BumpAllocator, data: &'top [u8]) -> TextBufferView<'top> { - Self::new_with_offset(allocator, data, 0) + pub fn new(context: EncodingContextRef<'top>, data: &'top [u8]) -> TextBufferView<'top> { + Self::new_with_offset(context, data, 0) } /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to the @@ -125,12 +126,12 @@ impl<'top> TextBufferView<'top> { /// Note that `offset` is the index of the larger stream at which `data` begins and not an /// offset _into_ `data`. pub fn new_with_offset( - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, data: &'top [u8], offset: usize, ) -> TextBufferView<'top> { TextBufferView { - allocator, + context, data, offset, } @@ -152,7 +153,7 @@ impl<'top> TextBufferView<'top> { TextBufferView { data: &self.data[offset..offset + length], offset: self.offset + offset, - allocator: self.allocator, + context: self.context, } } @@ -165,7 +166,7 @@ impl<'top> TextBufferView<'top> { TextBufferView { data: &self.data[offset..], offset: self.offset + offset, - allocator: self.allocator, + context: self.context, } } @@ -367,7 +368,7 @@ impl<'top> TextBufferView<'top> { self, ) -> IonParseResult<'top, Option>> { whitespace_and_then(alt(( - Self::match_e_expression.map(|matched| Some(RawValueExpr::MacroInvocation(matched))), + Self::match_e_expression.map(|matched| Some(RawValueExpr::EExp(matched))), value(None, tag(")")), pair( opt(Self::match_annotations), @@ -641,19 +642,19 @@ impl<'top> TextBufferView<'top> { map(Self::match_list, |_matched_list| { // TODO: Cache child expressions found in 1.0 list let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator).into_bump_slice(); EncodedTextValue::new(MatchedValue::List(not_yet_used_in_1_0)) }), map(Self::match_sexp, |_matched_sexp| { // TODO: Cache child expressions found in 1.0 sexp let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator).into_bump_slice(); EncodedTextValue::new(MatchedValue::SExp(not_yet_used_in_1_0)) }), map(Self::match_struct, |_matched_struct| { // TODO: Cache child expressions found in 1.0 struct let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator).into_bump_slice(); EncodedTextValue::new(MatchedValue::Struct(not_yet_used_in_1_0)) }), ))) @@ -777,7 +778,7 @@ impl<'top> TextBufferView<'top> { let list_body = self.slice_to_end(1); let sequence_iter = RawTextListIterator_1_1::new(list_body); let (span, child_exprs) = - match TextListSpanFinder_1_1::new(self.allocator, sequence_iter).find_span() { + match TextListSpanFinder_1_1::new(self.context.allocator, sequence_iter).find_span() { Ok((span, child_exprs)) => (span, child_exprs), // If the complete container isn't available, return an incomplete. Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), @@ -817,7 +818,7 @@ impl<'top> TextBufferView<'top> { let sexp_body = self.slice_to_end(1); let sexp_iter = RawTextSExpIterator_1_1::new(sexp_body); let (span, child_expr_cache) = - match TextSExpSpanFinder_1_1::new(self.allocator, sexp_iter).find_span(1) { + match TextSExpSpanFinder_1_1::new(self.context.allocator, sexp_iter).find_span(1) { Ok((span, child_expr_cache)) => (span, child_expr_cache), // If the complete container isn't available, return an incomplete. Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), @@ -871,7 +872,7 @@ impl<'top> TextBufferView<'top> { Self::match_e_expression, Self::match_delimiter_after_list_value, ) - .map(|matched| Some(RawValueExpr::MacroInvocation(matched))), + .map(|matched| Some(RawValueExpr::EExp(matched))), value(None, tag("]")), terminated( Self::match_annotated_value_1_1.map(Some), @@ -976,7 +977,7 @@ impl<'top> TextBufferView<'top> { let struct_body = self.slice_to_end(1); let struct_iter = RawTextStructIterator_1_1::new(struct_body); let (span, fields) = - match TextStructSpanFinder_1_1::new(self.allocator, struct_iter).find_span() { + match TextStructSpanFinder_1_1::new(self.context.allocator, struct_iter).find_span() { Ok((span, fields)) => (span, fields), // If the complete container isn't available, return an incomplete. Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), @@ -1017,26 +1018,27 @@ impl<'top> TextBufferView<'top> { // we tell the iterator how many bytes comprised the head of the expression: two bytes // for `(:` plus the length of the macro ID. let initial_bytes_skipped = 2 + macro_id_bytes.len(); - let (span, child_expr_cache) = match TextSExpSpanFinder_1_1::new(self.allocator, sexp_iter) - .find_span(initial_bytes_skipped) - { - Ok((span, child_expr_cache)) => (span, child_expr_cache), - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label(format!( - "matching an e-expression invoking macro {}", - macro_name - )) - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) + let (span, child_expr_cache) = + match TextSExpSpanFinder_1_1::new(self.context.allocator, sexp_iter) + .find_span(initial_bytes_skipped) + { + Ok((span, child_expr_cache)) => (span, child_expr_cache), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label(format!( + "matching an e-expression invoking macro {}", + macro_name + )) + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) + } } - } - }; + }; // For the matched span, we use `self` again to include the opening `(:` let matched = self.slice(0, span.len()); let remaining = self.slice_to_end(span.len()); @@ -2014,9 +2016,9 @@ impl<'data> nom::InputTake for TextBufferView<'data> { fn take_split(&self, count: usize) -> (Self, Self) { let (before, after) = self.data.split_at(count); - let buffer_before = TextBufferView::new_with_offset(self.allocator, before, self.offset()); + let buffer_before = TextBufferView::new_with_offset(self.context, before, self.offset()); let buffer_after = - TextBufferView::new_with_offset(self.allocator, after, self.offset() + count); + TextBufferView::new_with_offset(self.context, after, self.offset() + count); // Nom's convention is to place the remaining portion of the buffer first, which leads to // a potentially surprising reversed tuple order. (buffer_after, buffer_before) @@ -2220,6 +2222,9 @@ where #[cfg(test)] mod tests { + use crate::lazy::expanded::macro_table::MacroTable; + use crate::lazy::expanded::EncodingContext; + use crate::SymbolTable; use rstest::rstest; use super::*; @@ -2227,16 +2232,25 @@ mod tests { /// Stores an input string that can be tested against a given parser. struct MatchTest { input: String, - allocator: BumpAllocator, + context: EncodingContextRef<'static>, } impl MatchTest { /// Takes an `input` string and appends a trailing value to it, guaranteeing that the /// contents of the input are considered a complete token. fn new(input: &str) -> Self { + // For the sake of the unit tests, make a dummy encoding context with no lifetime + // constraints. + let macro_table_ref: &'static MacroTable = Box::leak(Box::new(MacroTable::new())); + let symbol_table_ref: &'static SymbolTable = Box::leak(Box::new(SymbolTable::new())); + let allocator_ref: &'static BumpAllocator = Box::leak(Box::new(BumpAllocator::new())); + let empty_context: EncodingContext<'static> = + EncodingContext::new(macro_table_ref, symbol_table_ref, allocator_ref); + let context: EncodingContextRef<'static> = + EncodingContextRef::new(Box::leak(Box::new(empty_context))); MatchTest { input: input.to_string(), - allocator: BumpAllocator::new(), + context, } } @@ -2244,7 +2258,7 @@ mod tests { where P: Parser, O, IonParseError<'data>>, { - let buffer = TextBufferView::new(&self.allocator, self.input.as_bytes()); + let buffer = TextBufferView::new(self.context, self.input.as_bytes()); match_length(parser).parse(buffer) } @@ -2890,8 +2904,8 @@ mod tests { } fn test_match_text_until_unescaped_str() { - let allocator = BumpAllocator::new(); - let input = TextBufferView::new(&allocator, r" foo bar \''' baz''' quux ".as_bytes()); + let context = EncodingContextRef::unit_test_context(); + let input = TextBufferView::new(context, r" foo bar \''' baz''' quux ".as_bytes()); let (_remaining, (matched, contains_escapes)) = input.match_text_until_unescaped_str(r#"'''"#).unwrap(); assert_eq!(matched.as_text().unwrap(), " foo bar \\''' baz"); diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index 0ade70d5..fa1a5839 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -131,7 +131,7 @@ impl<'top> MatchedFieldName<'top> { } pub fn read(&self) -> IonResult> { - self.syntax.read(self.input.allocator, self.input) + self.syntax.read(self.input.context.allocator, self.input) } pub fn range(&self) -> Range { @@ -1221,9 +1221,9 @@ impl MatchedClob { #[cfg(test)] mod tests { - use bumpalo::Bump as BumpAllocator; use crate::lazy::bytes_ref::BytesRef; + use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::buffer::TextBufferView; use crate::{Decimal, Int, IonResult, Timestamp}; @@ -1231,8 +1231,8 @@ mod tests { fn read_ints() -> IonResult<()> { fn expect_int(data: &str, expected: impl Into) { let expected: Int = expected.into(); - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let context = EncodingContextRef::unit_test_context(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_int().unwrap(); let actual = matched.read(buffer).unwrap(); assert_eq!( @@ -1265,8 +1265,8 @@ mod tests { fn read_timestamps() -> IonResult<()> { fn expect_timestamp(data: &str, expected: Timestamp) { let data = format!("{data} "); // Append a space - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let context = EncodingContextRef::unit_test_context(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_timestamp().unwrap(); let actual = matched.read(buffer).unwrap(); assert_eq!( @@ -1367,8 +1367,8 @@ mod tests { #[test] fn read_decimals() -> IonResult<()> { fn expect_decimal(data: &str, expected: Decimal) { - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let context = EncodingContextRef::unit_test_context(); + let buffer = TextBufferView::new(context, data.as_bytes()); let result = buffer.match_decimal(); assert!( result.is_ok(), @@ -1422,10 +1422,10 @@ mod tests { fn read_blobs() -> IonResult<()> { fn expect_blob(data: &str, expected: &str) { let data = format!("{data} "); // Append a space - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let context = EncodingContextRef::unit_test_context(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_blob().unwrap(); - let actual = matched.read(&allocator, buffer).unwrap(); + let actual = matched.read(context.allocator, buffer).unwrap(); assert_eq!( actual, expected.as_ref(), @@ -1460,11 +1460,11 @@ mod tests { // stream so the parser knows that the long-form strings are complete. We then trim // our fabricated value off of the input before reading. let data = format!("{data}\n0"); - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let context = EncodingContextRef::unit_test_context(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_string().unwrap(); let matched_input = buffer.slice(0, buffer.len() - 2); - let actual = matched.read(&allocator, matched_input).unwrap(); + let actual = matched.read(context.allocator, matched_input).unwrap(); assert_eq!( actual, expected, "Actual didn't match expected for input '{}'.\n{:?}\n!=\n{:?}", @@ -1496,25 +1496,28 @@ mod tests { #[test] fn read_clobs() -> IonResult<()> { - fn read_clob<'a>(allocator: &'a BumpAllocator, data: &'a str) -> IonResult> { - let buffer = TextBufferView::new(allocator, data.as_bytes()); + fn read_clob<'a>( + context: EncodingContextRef<'a>, + data: &'a str, + ) -> IonResult> { + let buffer = TextBufferView::new(context, data.as_bytes()); // All `read_clob` usages should be accepted by the matcher, so we can `unwrap()` the // call to `match_clob()`. let (_remaining, matched) = buffer.match_clob().unwrap(); // The resulting buffer slice may be rejected during reading. - matched.read(allocator, buffer) + matched.read(context.allocator, buffer) } - fn expect_clob_error(allocator: &BumpAllocator, data: &str) { - let actual = read_clob(allocator, data); + fn expect_clob_error(context: EncodingContextRef, data: &str) { + let actual = read_clob(context, data); assert!( actual.is_err(), "Successfully read a clob from illegal input." ); } - fn expect_clob(allocator: &BumpAllocator, data: &str, expected: &str) { - let result = read_clob(allocator, data); + fn expect_clob(context: EncodingContextRef, data: &str, expected: &str) { + let result = read_clob(context, data); assert!( result.is_ok(), "Unexpected read failure for input '{data}': {:?}", @@ -1561,10 +1564,9 @@ mod tests { ("{{\"foo\rbar\rbaz\"}}", "foo\rbar\rbaz"), ]; - let mut allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); for (input, expected) in tests { - expect_clob(&allocator, input, expected); - allocator.reset(); + expect_clob(context, input, expected); } let illegal_inputs = [ @@ -1585,8 +1587,7 @@ mod tests { ]; for input in illegal_inputs { - expect_clob_error(&allocator, input); - allocator.reset(); + expect_clob_error(context, input); } Ok(()) diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index 2262ff49..658ce5d6 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -1,10 +1,9 @@ #![allow(non_camel_case_types)] -use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::IonEncoding; use crate::lazy::decoder::{Decoder, LazyRawReader, RawVersionMarker}; use crate::lazy::encoding::TextEncoding_1_0; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; @@ -44,13 +43,13 @@ impl<'data> LazyRawTextReader_1_0<'data> { pub fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { let input = TextBufferView::new_with_offset( - allocator, + context, &self.input[self.local_offset..], self.stream_offset + self.local_offset, ); @@ -107,12 +106,12 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_0> for LazyRawTextReader_1_0<'da fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { - self.next(allocator) + self.next(context) } fn position(&self) -> usize { @@ -134,21 +133,21 @@ mod tests { use super::*; struct TestReader<'data> { - allocator: BumpAllocator, + context: EncodingContextRef<'data>, reader: LazyRawTextReader_1_0<'data>, } impl<'data> TestReader<'data> { fn next(&mut self) -> IonResult> { - self.reader.next(&self.allocator) + self.reader.next(self.context) } fn expect_next<'a>(&'a mut self, expected: RawValueRef<'a, TextEncoding_1_0>) where 'data: 'a, { - let TestReader { allocator, reader } = self; + let TestReader { context, reader } = self; let lazy_value = reader - .next(allocator) + .next(*context) .expect("advancing the reader failed") .expect_value() .expect("expected a value"); @@ -300,7 +299,7 @@ mod tests { let reader = &mut TestReader { reader: LazyRawTextReader_1_0::new(data.as_bytes()), - allocator: BumpAllocator::new(), + context: EncodingContextRef::unit_test_context(), }; assert_eq!(reader.next()?.expect_ivm()?.version(), (1, 0)); @@ -474,23 +473,23 @@ mod tests { #[test] fn ranges_and_spans() -> IonResult<()> { - let bump = bumpalo::Bump::new(); + let context = EncodingContextRef::unit_test_context(); let data = b"foo 2024T bar::38 [1, 2, 3]"; let mut reader = LazyRawTextReader_1_0::new(data); - let foo = reader.next(&bump)?.expect_value()?; + let foo = reader.next(context)?.expect_value()?; assert_eq!(foo.span(), b"foo"); assert_eq!(foo.range(), 0..3); - let timestamp = reader.next(&bump)?.expect_value()?; + let timestamp = reader.next(context)?.expect_value()?; assert_eq!(timestamp.span(), b"2024T"); assert_eq!(timestamp.range(), 4..9); - let annotated_int = reader.next(&bump)?.expect_value()?; + let annotated_int = reader.next(context)?.expect_value()?; assert_eq!(annotated_int.span(), b"bar::38"); assert_eq!(annotated_int.range(), 10..17); - let list_value = reader.next(&bump)?.expect_value()?; + let list_value = reader.next(context)?.expect_value()?; assert_eq!(list_value.span(), b"[1, 2, 3]"); assert_eq!(list_value.range(), 18..27); diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs index 1187e77b..f6372a6d 100644 --- a/src/lazy/text/raw/sequence.rs +++ b/src/lazy/text/raw/sequence.rs @@ -304,12 +304,12 @@ mod tests { use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; - use bumpalo::Bump as BumpAllocator; + use crate::lazy::expanded::EncodingContextRef; fn expect_sequence_range(ion_data: &str, expected: Range) -> IonResult<()> { - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); - let value = reader.next(&allocator)?.expect_value()?; + let value = reader.next(context)?.expect_value()?; let actual_range = value.data_range(); assert_eq!( actual_range, expected, diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs index 8e9e29af..6ab97ea5 100644 --- a/src/lazy/text/raw/struct.rs +++ b/src/lazy/text/raw/struct.rs @@ -157,16 +157,15 @@ impl<'top> IntoIterator for LazyRawTextStruct_1_0<'top> { mod tests { use std::ops::Range; - use bumpalo::Bump as BumpAllocator; - use crate::lazy::decoder::{HasRange, HasSpan, LazyRawStruct, LazyRawValue}; + use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; fn expect_struct_range(ion_data: &str, expected: Range) -> IonResult<()> { - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); - let value = reader.next(&allocator)?.expect_value()?; + let value = reader.next(context)?.expect_value()?; let actual_range = value.data_range(); assert_eq!( actual_range, expected, @@ -231,10 +230,10 @@ mod tests { ), ]; for (input, field_name_ranges) in tests { - let bump = bumpalo::Bump::new(); + let context = EncodingContextRef::unit_test_context(); let mut reader = LazyRawTextReader_1_0::new(input.as_bytes()); let struct_ = reader - .next(&bump)? + .next(context)? .expect_value()? .read()? .expect_struct()?; diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index fdd70bf4..de094d40 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -5,7 +5,6 @@ use std::fmt::{Debug, Display, Formatter}; use std::ops::Range; use bumpalo::collections::Vec as BumpVec; -use bumpalo::Bump as BumpAllocator; use nom::character::streaming::satisfy; use crate::lazy::any_encoding::IonEncoding; @@ -17,6 +16,7 @@ use crate::lazy::decoder::{ }; use crate::lazy::encoding::TextEncoding_1_1; use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::span::Span; use crate::lazy::text::buffer::TextBufferView; @@ -52,13 +52,13 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'da fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { let input = TextBufferView::new_with_offset( - allocator, + context, &self.input[self.local_offset..], self.stream_offset + self.local_offset, ); @@ -746,12 +746,12 @@ mod tests { use super::*; fn expect_next<'top, 'data: 'top>( - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, reader: &'top mut LazyRawTextReader_1_1<'data>, expected: RawValueRef<'top, TextEncoding_1_1>, ) { let lazy_value = reader - .next(allocator) + .next(context) .expect("advancing the reader failed") .expect_value() .expect("expected a value"); @@ -775,18 +775,18 @@ mod tests { false "#; - let allocator = BumpAllocator::new(); + let context = EncodingContextRef::unit_test_context(); let reader = &mut LazyRawTextReader_1_1::new(data.as_bytes()); // $ion_1_1 - assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 1)); + assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 1)); // "foo" - expect_next(&allocator, reader, RawValueRef::String("foo".into())); + expect_next(context, reader, RawValueRef::String("foo".into())); // bar - expect_next(&allocator, reader, RawValueRef::Symbol("bar".into())); + expect_next(context, reader, RawValueRef::Symbol("bar".into())); // (baz null.string) let sexp = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_sexp()?; @@ -801,10 +801,10 @@ mod tests { ); assert!(children.next().is_none()); // (:quux quuz) - let macro_invocation = reader.next(&allocator)?.expect_macro_invocation()?; + let macro_invocation = reader.next(context)?.expect_macro_invocation()?; assert_eq!(macro_invocation.id, MacroIdRef::LocalName("quux")); - expect_next(&allocator, reader, RawValueRef::Int(77.into())); - expect_next(&allocator, reader, RawValueRef::Bool(false)); + expect_next(context, reader, RawValueRef::Int(77.into())); + expect_next(context, reader, RawValueRef::Bool(false)); Ok(()) } } diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs index 4835f42f..27e75f5b 100644 --- a/src/lazy/text/value.rs +++ b/src/lazy/text/value.rs @@ -197,7 +197,7 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for LazyRawTextValue<'to fn read(&self) -> IonResult> { // Get the value's matched input, skipping over any annotations let matched_input = self.input.slice_to_end(self.encoded_value.data_offset()); - let allocator = self.input.allocator; + let allocator = self.input.context.allocator; use crate::lazy::text::matched::MatchedValue::*; let value_ref = match self.encoded_value.matched() { @@ -258,7 +258,7 @@ impl<'top> Iterator for RawTextAnnotationsIterator<'top> { let matched_input = self .input .slice(span.start - self.input.offset(), span.len()); - let text = match symbol.read(self.input.allocator, matched_input) { + let text = match symbol.read(self.input.context.allocator, matched_input) { Ok(text) => text, Err(e) => { self.has_returned_error = true; @@ -272,8 +272,7 @@ impl<'top> Iterator for RawTextAnnotationsIterator<'top> { #[cfg(test)] mod tests { - use bumpalo::Bump as BumpAllocator; - + use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::value::RawTextAnnotationsIterator; use crate::{IonResult, RawSymbolRef}; @@ -281,8 +280,8 @@ mod tests { #[test] fn iterate_annotations() -> IonResult<()> { fn test(input: &str) -> IonResult<()> { - let allocator = BumpAllocator::new(); - let input = TextBufferView::new(&allocator, input.as_bytes()); + let context = EncodingContextRef::unit_test_context(); + let input = TextBufferView::new(context, input.as_bytes()); let mut iter = RawTextAnnotationsIterator::new(input); assert_eq!(iter.next().unwrap()?, RawSymbolRef::Text("foo")); assert_eq!(iter.next().unwrap()?, RawSymbolRef::Text("bar")); diff --git a/src/lib.rs b/src/lib.rs index 9fa8611f..0ecd9128 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,7 +186,7 @@ pub(crate) mod unsafe_helpers; #[cfg(feature = "experimental-ion-hash")] pub mod ion_hash; -mod lazy; +pub(crate) mod lazy; mod write_config; pub use crate::lazy::any_encoding::AnyEncoding; From d19abeb06957ec38370c1174b3892bfda14b349c Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 10 Jun 2024 09:47:31 -0400 Subject: [PATCH 12/15] `EncodingContext` now owns its fields --- src/lazy/any_encoding.rs | 4 +- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 27 ++-- src/lazy/binary/raw/v1_1/reader.rs | 58 ++++++--- src/lazy/encoder/text/v1_1/writer.rs | 5 +- src/lazy/expanded/compiler.rs | 24 ++-- src/lazy/expanded/macro_evaluator.rs | 10 +- src/lazy/expanded/macro_table.rs | 2 +- src/lazy/expanded/mod.rs | 127 +++++++++++-------- src/lazy/expanded/struct.rs | 4 +- src/lazy/expanded/template.rs | 6 +- src/lazy/sequence.rs | 4 +- src/lazy/streaming_raw_reader.rs | 14 +- src/lazy/struct.rs | 2 +- src/lazy/system_reader.rs | 4 +- src/lazy/text/buffer.rs | 95 +++++++------- src/lazy/text/matched.rs | 28 ++-- src/lazy/text/raw/reader.rs | 4 +- src/lazy/text/raw/sequence.rs | 7 +- src/lazy/text/raw/struct.rs | 8 +- src/lazy/text/raw/v1_1/reader.rs | 4 +- src/lazy/text/value.rs | 9 +- src/lazy/value.rs | 8 +- 22 files changed, 256 insertions(+), 198 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 039a1020..0ca2d483 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -1464,6 +1464,7 @@ mod tests { use crate::lazy::any_encoding::LazyRawAnyReader; use crate::lazy::binary::test_utilities::to_binary_ion; use crate::lazy::decoder::{LazyRawReader, LazyRawSequence, LazyRawValue}; + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::{IonResult, RawSymbolRef, Timestamp}; @@ -1473,7 +1474,8 @@ mod tests { #[test] fn any_encoding() -> IonResult<()> { fn test_input(data: &[u8]) -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawAnyReader::new(data); assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 8bfa4d33..74583201 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -476,21 +476,18 @@ impl<'a> ImmutableBuffer<'a> { .ok_or_else(|| { IonError::decoding_error(format!("invocation of unknown macro '{macro_id:?}'")) })?; - // TODO: The macro table should have a Signature on file for each of the system macros too. - // For now, we simply say how many arguments to expect. use MacroKind::*; let num_parameters = match macro_def.kind() { - Void => 0, - Values => 1, - MakeString => 1, Template(t) => t.signature().parameters().len(), + // Many system macros like `values`, `make_string`, etc take a variadic number of args. + _ => todo!("system macros require support for argument group encoding"), }; let mut args_buffer = buffer_after_id; - let mut args_cache = self + let args_cache = self .context - .allocator - .alloc_with(|| BumpVec::with_capacity_in(num_parameters, self.context.allocator)); + .allocator() + .alloc_with(|| BumpVec::with_capacity_in(num_parameters, self.context.allocator())); for _ in 0..num_parameters { let value_expr = match buffer_after_id.peek_sequence_value_expr()? { Some(expr) => expr, @@ -539,9 +536,11 @@ pub struct EncodedAnnotations { #[cfg(test)] mod tests { use super::*; + use crate::lazy::expanded::EncodingContext; fn input_test>(input: A) { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let input = ImmutableBuffer::new(context, input.as_ref()); // We can peek at the first byte... assert_eq!(input.peek_next_byte(), Some(b'f')); @@ -577,7 +576,8 @@ mod tests { fn validate_nop_length() { // read_nop_pad reads a single NOP value, this test ensures that we're tracking the right // size for these values. - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let buffer = ImmutableBuffer::new(context, &[0xECu8]); let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 1); @@ -586,4 +586,11 @@ mod tests { let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 4); } + + #[test] + fn read_e_expressions() { + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); + // let eexp = + } } diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 64bd744e..d372158e 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -1,16 +1,15 @@ #![allow(non_camel_case_types)] +use crate::lazy::any_encoding::IonEncoding; use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::decoder::{Decoder, LazyRawReader, RawValueExpr, RawVersionMarker}; use crate::lazy::encoder::private::Sealed; use crate::lazy::encoding::BinaryEncoding_1_1; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; use crate::{Encoding, HasRange, IonResult}; -use crate::lazy::any_encoding::IonEncoding; -use crate::lazy::expanded::EncodingContextRef; - pub struct LazyRawBinaryReader_1_1<'data> { input: &'data [u8], // The offset from the beginning of the overall stream at which the `input` slice begins @@ -140,11 +139,12 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 #[cfg(test)] mod tests { + use rstest::*; + use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; - use crate::lazy::expanded::EncodingContextRef; + use crate::lazy::expanded::EncodingContext; use crate::raw_symbol_ref::RawSymbolRef; use crate::{IonResult, IonType}; - use rstest::*; #[test] fn nop() -> IonResult<()> { @@ -157,7 +157,8 @@ mod tests { 0xEA, // null.null ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -180,7 +181,8 @@ mod tests { 0x6E, // true 0x6F, // false ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -223,7 +225,8 @@ mod tests { // Integer: 147573952589676412929 0xF6, 0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -273,7 +276,8 @@ mod tests { 0xF9, 0x31, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x6C, 0x65, 0x6E, 0x67, 0x74, 0x68, 0x20, 0x65, 0x6E, 0x63, 0x6f, 0x64, 0x69, 0x6E, 0x67, ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -343,7 +347,8 @@ mod tests { // Symbol ID: 65,793 0xE3, 0x01, 0x00, 0x00, ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -423,7 +428,8 @@ mod tests { // 3.141592653589793 (double-precision) 0x6D, 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -514,7 +520,8 @@ mod tests { fn decimals(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); @@ -559,7 +566,8 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); @@ -592,7 +600,8 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); @@ -624,7 +633,8 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); @@ -651,7 +661,8 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -679,7 +690,8 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let _ivm = reader.next(context)?.expect_ivm()?; @@ -752,7 +764,8 @@ mod tests { ]; for (ion_data, expected_types) in tests { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); let container = reader .next(context)? @@ -808,7 +821,8 @@ mod tests { ]; for (ion_data, expected_types) in tests { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); let container = reader .next(context)? @@ -846,7 +860,8 @@ mod tests { ]; for (data, expected_type) in data { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); let actual_type = reader .next(context)? @@ -962,7 +977,8 @@ mod tests { ]; for (ion_data, field_pairs) in tests { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); let actual_data = reader .next(context)? diff --git a/src/lazy/encoder/text/v1_1/writer.rs b/src/lazy/encoder/text/v1_1/writer.rs index 5772aebf..94d24ebd 100644 --- a/src/lazy/encoder/text/v1_1/writer.rs +++ b/src/lazy/encoder/text/v1_1/writer.rs @@ -100,7 +100,7 @@ mod tests { use crate::lazy::encoder::write_as_ion::WriteAsSExp; use crate::lazy::encoder::LazyRawWriter; use crate::lazy::expanded::macro_evaluator::RawEExpression; - use crate::lazy::expanded::EncodingContextRef; + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::raw::v1_1::reader::{LazyRawTextReader_1_1, MacroIdRef}; use crate::symbol_ref::AsSymbolRef; use crate::{ @@ -264,7 +264,8 @@ mod tests { println!("{encoded_text}"); let mut reader = LazyRawTextReader_1_1::new(encoded_text.as_bytes()); - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let _marker = reader.next(context)?.expect_ivm()?; let eexp = reader.next(context)?.expect_macro_invocation()?; assert_eq!(MacroIdRef::LocalName("foo"), eexp.id()); diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index dc31e9fa..14667204 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -506,15 +506,15 @@ impl TemplateCompiler { #[cfg(test)] mod tests { + use std::collections::HashMap; + use crate::lazy::expanded::compiler::TemplateCompiler; - use crate::lazy::expanded::macro_table::MacroTable; use crate::lazy::expanded::template::{ ExprRange, TemplateBodyMacroInvocation, TemplateBodyValueExpr, TemplateBodyVariableReference, TemplateMacro, TemplateValue, }; - use crate::lazy::expanded::EncodingContext; - use crate::{Int, IntoAnnotations, IonResult, Symbol, SymbolTable}; - use std::collections::HashMap; + use crate::lazy::expanded::{EncodingContext, EncodingContextRef}; + use crate::{Int, IntoAnnotations, IonResult, Symbol}; // This function only looks at the value portion of the TemplateElement. To compare annotations, // see the `expect_annotations` method. @@ -602,26 +602,18 @@ mod tests { } struct TestResources { - macro_table: MacroTable, - symbol_table: SymbolTable, - allocator: bumpalo::Bump, + context: EncodingContext, } impl TestResources { fn new() -> Self { Self { - macro_table: MacroTable::new(), - symbol_table: SymbolTable::new(), - allocator: bumpalo::Bump::new(), + context: EncodingContext::empty(), } } - fn context(&self) -> EncodingContext { - EncodingContext { - macro_table: &self.macro_table, - symbol_table: &self.symbol_table, - allocator: &self.allocator, - } + fn context(&self) -> EncodingContextRef { + self.context.get_ref() } } diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 0ce3deed..acc7c596 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -53,7 +53,7 @@ pub trait RawEExpression<'top, D: Decoder = Self>>: /// If the ID cannot be found in the `EncodingContext`, returns `Err`. fn resolve(self, context: EncodingContextRef<'top>) -> IonResult> { let invoked_macro = context - .macro_table + .macro_table() .macro_with_id(self.id()) .ok_or_else(|| { IonError::decoding_error(format!("unrecognized macro ID {:?}", self.id())) @@ -269,8 +269,8 @@ pub struct MacroEvaluator<'top, D: Decoder> { impl<'top, D: Decoder> MacroEvaluator<'top, D> { pub fn new(context: EncodingContextRef<'top>, environment: Environment<'top, D>) -> Self { - let macro_stack = BumpVec::new_in(context.allocator); - let mut env_stack = BumpVec::new_in(context.allocator); + let macro_stack = BumpVec::new_in(context.allocator()); + let mut env_stack = BumpVec::new_in(context.allocator()); env_stack.push(environment); Self { macro_stack, @@ -574,7 +574,7 @@ impl<'top, D: Decoder> MakeStringExpansion<'top, D> { } // Create a bump-allocated buffer to hold our constructed string - let mut buffer = BumpString::new_in(context.allocator); + let mut buffer = BumpString::new_in(context.allocator()); // We need to eagerly evaluate all of the arguments to `make_string` to produce its next // (and only) value. However, because `&mut self` (the expansion state) lives in a stack @@ -602,7 +602,7 @@ impl<'top, D: Decoder> MakeStringExpansion<'top, D> { // Convert our BumpString<'bump> into a &'bump str that we can wrap in an `ExpandedValueRef` let constructed_text = buffer.into_bump_str(); let expanded_value_ref: &'top ExpandedValueRef<'top, D> = context - .allocator + .allocator() .alloc_with(|| ExpandedValueRef::String(StrRef::from(constructed_text))); static EMPTY_ANNOTATIONS: &[&str] = &[]; diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index c5eba3d1..37628f69 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -59,7 +59,7 @@ impl<'top> MacroRef<'top> { /// Allows callers to resolve a macro ID (that is: name or address) to a [`MacroKind`], confirming /// its validity and allowing evaluation to begin. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct MacroTable { macros_by_address: Vec, // Maps names to an address that can be used to query the Vec above. diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 4c13322c..03ef4b00 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -90,18 +90,18 @@ pub mod template; // happens to be available in the buffer OR the set that leads up to the next encoding directive. // The value proposition of being able to lazily explore multiple top level values concurrently // would need to be proved out first. -#[derive(Copy, Clone, Debug)] -pub struct EncodingContext<'top> { - pub(crate) macro_table: &'top MacroTable, - pub(crate) symbol_table: &'top SymbolTable, - pub(crate) allocator: &'top BumpAllocator, +#[derive(Debug)] +pub struct EncodingContext { + pub(crate) macro_table: MacroTable, + pub(crate) symbol_table: SymbolTable, + pub(crate) allocator: BumpAllocator, } -impl<'top> EncodingContext<'top> { +impl EncodingContext { pub fn new( - macro_table: &'top MacroTable, - symbol_table: &'top SymbolTable, - allocator: &'top BumpAllocator, + macro_table: MacroTable, + symbol_table: SymbolTable, + allocator: BumpAllocator, ) -> Self { Self { macro_table, @@ -110,18 +110,22 @@ impl<'top> EncodingContext<'top> { } } - pub fn get_ref(&'top self) -> EncodingContextRef<'top> { + pub fn empty() -> Self { + Self::new(MacroTable::new(), SymbolTable::new(), BumpAllocator::new()) + } + + pub fn get_ref(&self) -> EncodingContextRef { EncodingContextRef { context: self } } } #[derive(Debug, Copy, Clone)] pub struct EncodingContextRef<'top> { - context: &'top EncodingContext<'top>, + context: &'top EncodingContext, } impl<'top> EncodingContextRef<'top> { - pub fn new(context: &'top EncodingContext<'top>) -> Self { + pub fn new(context: &'top EncodingContext) -> Self { Self { context } } @@ -129,19 +133,24 @@ impl<'top> EncodingContextRef<'top> { pub fn unit_test_context() -> EncodingContextRef<'static> { // For the sake of the unit tests, make a dummy encoding context with no lifetime // constraints. - let macro_table_ref: &'static MacroTable = Box::leak(Box::new(MacroTable::new())); - let symbol_table_ref: &'static SymbolTable = Box::leak(Box::new(SymbolTable::new())); - let allocator_ref: &'static BumpAllocator = Box::leak(Box::new(BumpAllocator::new())); - let empty_context: EncodingContext<'static> = - EncodingContext::new(macro_table_ref, symbol_table_ref, allocator_ref); - let context: EncodingContextRef<'static> = - EncodingContextRef::new(Box::leak(Box::new(empty_context))); - context + EncodingContextRef::new(Box::leak(Box::new(EncodingContext::empty()))) + } + + pub fn allocator(&self) -> &'top BumpAllocator { + &self.context.allocator + } + + pub fn symbol_table(&self) -> &'top SymbolTable { + &self.context.symbol_table + } + + pub fn macro_table(&self) -> &'top MacroTable { + &self.context.macro_table } } impl<'top> Deref for EncodingContextRef<'top> { - type Target = EncodingContext<'top>; + type Target = EncodingContext; fn deref(&self) -> &Self::Target { self.context @@ -229,12 +238,7 @@ pub struct ExpandingReader { // Holds information found in symbol tables and encoding directives (TODO) that can be applied // to the encoding context the next time the reader is between top-level expressions. pending_lst: UnsafeCell, - // A bump allocator that is cleared between top-level expressions. - allocator: UnsafeCell, - // TODO: Make the symbol and macro tables traits on `Encoding` such that they can be configured - // statically. Then 1.0 types can use `Never` for the macro table. - symbol_table: UnsafeCell, - macro_table: UnsafeCell, + encoding_context: UnsafeCell, catalog: Box, } @@ -246,10 +250,8 @@ impl ExpandingReader { Self { raw_reader: raw_reader.into(), evaluator_ptr: None.into(), - allocator: BumpAllocator::new().into(), + encoding_context: EncodingContext::empty().into(), pending_lst: PendingLst::new().into(), - symbol_table: SymbolTable::new().into(), - macro_table: MacroTable::new().into(), catalog, } } @@ -257,34 +259,54 @@ impl ExpandingReader { // TODO: This method is temporary. It will be removed when the ability to read 1.1 encoding // directives from the input stream is available. Until then, template creation is manual. pub fn register_template(&mut self, template_definition: &str) -> IonResult { - let context = self.context(); - let template_macro: TemplateMacro = - { TemplateCompiler::compile_from_text(context.get_ref(), template_definition)? }; + let template_macro: TemplateMacro = self.compile_template(template_definition)?; + self.add_macro(template_macro) + } + + fn compile_template(&self, template_definition: &str) -> IonResult { + TemplateCompiler::compile_from_text(self.context(), template_definition) + } - let macro_table = self.macro_table.get_mut(); + fn add_macro(&mut self, template_macro: TemplateMacro) -> IonResult { + let macro_table = &mut self.context_mut().macro_table; macro_table.add_macro(template_macro) } - pub fn context(&self) -> EncodingContext<'_> { + pub fn context(&self) -> EncodingContextRef<'_> { // SAFETY: The only time that the macro table, symbol table, and allocator can be modified // is in the body of the method `between_top_level_expressions`. As long as nothing holds // a reference to the `EncodingContext` we create here when that method is running, // this is safe. - unsafe { - EncodingContext::new( - &*self.macro_table.get(), - &*self.symbol_table.get(), - &*self.allocator.get(), - ) - } + unsafe { (*self.encoding_context.get()).get_ref() } + } + + pub fn context_mut(&mut self) -> &mut EncodingContext { + // SAFETY: If the caller has a `&mut` reference to `self`, it is the only mutable reference + // that can modify `self.encoding_context`. + unsafe { &mut *self.encoding_context.get() } + } + + // SAFETY: This method takes an immutable reference to `self` and then modifies the + // EncodingContext's bump allocator via `UnsafeCell`. This should only be called from + // `between_top_level_values`, and the caller must confirm that nothing else holds a + // reference to any structures within `EncodingContext`. + unsafe fn reset_bump_allocator(&self) { + let context: &mut EncodingContext = &mut *self.encoding_context.get(); + context.allocator.reset(); } - pub fn pending_symtab_changes(&self) -> &PendingLst { + pub fn pending_lst(&self) -> &PendingLst { // If the user is able to call this method, the PendingLst is not being modified and it's // safe to immutably reference. unsafe { &*self.pending_lst.get() } } + pub fn pending_lst_mut(&mut self) -> &mut PendingLst { + // SAFETY: If the caller has a `&mut` reference to `self`, it is the only mutable reference + // that can modify `self.pending_lst`. + unsafe { &mut *self.pending_lst.get() } + } + fn ptr_to_mut_ref<'a, T>(ptr: *mut ()) -> &'a mut T { let typed_ptr: *mut T = ptr.cast(); unsafe { &mut *typed_ptr } @@ -355,21 +377,24 @@ impl ExpandingReader { /// /// This is the reader's opportunity to make any pending changes to the encoding context. fn between_top_level_expressions(&self) { - // SAFETY: This is the only place where we modify the encoding context. Take care not to - // alias the allocator, symbol table, or macro table in this scope. - // We're going to clear the bump allocator, so drop our reference to the evaluator that // lives there. self.evaluator_ptr.set(None); - // Clear the allocator. - let allocator: &mut BumpAllocator = unsafe { &mut *self.allocator.get() }; - allocator.reset(); + // Clear the bump allocator. + // SAFETY: This is the only place where we modify the encoding context. Take care not to + // alias the allocator, symbol table, or macro table inside this `unsafe` scope. + unsafe { self.reset_bump_allocator() }; // If the pending LST has changes to apply, do so. + // SAFETY: Nothing else holds a reference to the `PendingLst`'s contents, so we can use the + // `UnsafeCell` to get a mutable reference to it. let pending_lst: &mut PendingLst = unsafe { &mut *self.pending_lst.get() }; if pending_lst.has_changes { - let symbol_table: &mut SymbolTable = unsafe { &mut *self.symbol_table.get() }; + // SAFETY: Nothing else holds a reference to the `EncodingContext`'s contents, so we can use the + // `UnsafeCell` to get a mutable reference to its symbol table. + let symbol_table: &mut SymbolTable = + &mut unsafe { &mut *self.encoding_context.get() }.symbol_table; Self::apply_pending_lst(pending_lst, symbol_table); } } @@ -416,7 +441,7 @@ impl ExpandingReader { // to find an expression that yields no values (for example: `(:void)`), so we perform this // step in a loop until we get a value or end-of-stream. - let allocator: &BumpAllocator = unsafe { &*self.allocator.get() }; + let allocator: &BumpAllocator = self.context().allocator(); let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context())); loop { // Pull another top-level expression from the input stream if one is available. diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index 013f1dc1..ab09c810 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -100,7 +100,7 @@ impl<'top, D: Decoder> LazyExpandedFieldName<'top, D> { LazyExpandedFieldName::RawName(context, name) => match name.read()? { RawSymbolRef::Text(text) => Ok(text.into()), RawSymbolRef::SymbolId(sid) => context - .symbol_table + .symbol_table() .symbol_for(sid) .map(AsSymbolRef::as_symbol_ref) .ok_or_else(|| { @@ -237,7 +237,7 @@ impl<'top, D: Decoder> LazyExpandedStruct<'top, D> { } pub fn bump_iter(&self) -> &'top mut ExpandedStructIterator<'top, D> { - self.context.allocator.alloc_with(|| self.iter()) + self.context.allocator().alloc_with(|| self.iter()) } pub fn find(&self, name: &str) -> IonResult>> { diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index 0e24cc7a..68d51373 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -203,7 +203,7 @@ impl<'top, D: Decoder> Iterator for TemplateSequenceIterator<'top, D> { // to the top of the loop. let invoked_macro = self .context - .macro_table + .macro_table() .macro_at_address(body_invocation.invoked_macro_address) .unwrap(); let invocation = TemplateMacroInvocation::new( @@ -317,7 +317,7 @@ impl<'top, D: Decoder> Iterator for TemplateStructUnexpandedFieldsIterator<'top, TemplateBodyValueExpr::MacroInvocation(body_invocation) => { let invoked_macro = self .context - .macro_table + .macro_table() .macro_at_address(body_invocation.invoked_macro_address) .unwrap(); let invocation = TemplateMacroInvocation::new( @@ -636,7 +636,7 @@ impl TemplateBodyMacroInvocation { context: EncodingContextRef<'top>, ) -> TemplateMacroInvocation<'top> { let invoked_macro = context - .macro_table + .macro_table() .macro_at_address(self.invoked_macro_address) .unwrap(); diff --git a/src/lazy/sequence.rs b/src/lazy/sequence.rs index b00a7be5..3943309c 100644 --- a/src/lazy/sequence.rs +++ b/src/lazy/sequence.rs @@ -110,7 +110,7 @@ impl<'top, D: Decoder> LazyList<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_list.annotations(), - symbol_table: self.expanded_list.context.symbol_table, + symbol_table: self.expanded_list.context.symbol_table(), } } } @@ -259,7 +259,7 @@ impl<'top, D: Decoder> LazySExp<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_sexp.annotations(), - symbol_table: self.expanded_sexp.context.symbol_table, + symbol_table: self.expanded_sexp.context.symbol_table(), } } } diff --git a/src/lazy/streaming_raw_reader.rs b/src/lazy/streaming_raw_reader.rs index f17e31d0..e9a4a41a 100644 --- a/src/lazy/streaming_raw_reader.rs +++ b/src/lazy/streaming_raw_reader.rs @@ -432,7 +432,7 @@ mod tests { use crate::lazy::any_encoding::AnyEncoding; use crate::lazy::decoder::{Decoder, LazyRawValue}; - use crate::lazy::expanded::EncodingContextRef; + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::streaming_raw_reader::{IonInput, StreamingRawReader}; @@ -464,7 +464,8 @@ mod tests { #[test] fn read_empty_slice() -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let ion = ""; let mut reader = StreamingRawReader::new(AnyEncoding, ion.as_bytes()); // We expect `Ok(EndOfStream)`, not `Err(Incomplete)`. @@ -473,7 +474,8 @@ mod tests { } fn read_example_stream(input: impl IonInput) -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = StreamingRawReader::new(AnyEncoding, input); expect_string(reader.next(context)?, "foo")?; expect_string(reader.next(context)?, "bar")?; @@ -520,7 +522,8 @@ mod tests { const INVALID_EXAMPLE_STREAM: &str = "2024-03-12T16:33.000-05:"; // Missing offset minutes fn read_invalid_example_stream(input: impl IonInput) -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = StreamingRawReader::new(AnyEncoding, input); let result = reader.next(context); // Because the input stream is exhausted, the incomplete value is illegal data and raises @@ -567,7 +570,8 @@ mod tests { } // This guarantees that there are several intermediate reading states in which the buffer // contains incomplete data that could be misinterpreted by a reader. - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = StreamingRawReader::new(v1_0::Text, IonStream::new(input)); assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs index a5e28a19..a6affb02 100644 --- a/src/lazy/struct.rs +++ b/src/lazy/struct.rs @@ -267,7 +267,7 @@ impl<'top, D: Decoder> LazyStruct<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_struct.annotations(), - symbol_table: self.expanded_struct.context.symbol_table, + symbol_table: self.expanded_struct.context.symbol_table(), } } } diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 45197b44..93e17002 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -131,11 +131,11 @@ impl SystemReader { } pub fn symbol_table(&self) -> &SymbolTable { - self.expanding_reader.context().symbol_table + self.expanding_reader.context().symbol_table() } pub fn pending_symtab_changes(&self) -> &PendingLst { - self.expanding_reader.pending_symtab_changes() + self.expanding_reader.pending_lst() } /// Returns the next top-level stream item (IVM, Symbol Table, Value, or Nothing) as a diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index d3769417..8d23a5a3 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -4,7 +4,6 @@ use std::ops::{Range, RangeFrom, RangeTo}; use std::slice::Iter; use std::str::FromStr; -use bumpalo::Bump as BumpAllocator; use nom::branch::alt; use nom::bytes::complete::{ is_a as complete_is_a, is_not as complete_is_not, tag as complete_tag, @@ -642,19 +641,19 @@ impl<'top> TextBufferView<'top> { map(Self::match_list, |_matched_list| { // TODO: Cache child expressions found in 1.0 list let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.context.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator()).into_bump_slice(); EncodedTextValue::new(MatchedValue::List(not_yet_used_in_1_0)) }), map(Self::match_sexp, |_matched_sexp| { // TODO: Cache child expressions found in 1.0 sexp let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.context.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator()).into_bump_slice(); EncodedTextValue::new(MatchedValue::SExp(not_yet_used_in_1_0)) }), map(Self::match_struct, |_matched_struct| { // TODO: Cache child expressions found in 1.0 struct let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.context.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator()).into_bump_slice(); EncodedTextValue::new(MatchedValue::Struct(not_yet_used_in_1_0)) }), ))) @@ -777,22 +776,26 @@ impl<'top> TextBufferView<'top> { // Scan ahead to find the end of this list. let list_body = self.slice_to_end(1); let sequence_iter = RawTextListIterator_1_1::new(list_body); - let (span, child_exprs) = - match TextListSpanFinder_1_1::new(self.context.allocator, sequence_iter).find_span() { - Ok((span, child_exprs)) => (span, child_exprs), - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label("matching a v1.1 list") - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) - } + let (span, child_exprs) = match TextListSpanFinder_1_1::new( + self.context.allocator(), + sequence_iter, + ) + .find_span() + { + Ok((span, child_exprs)) => (span, child_exprs), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a v1.1 list") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) } - }; + } + }; // For the matched span, we use `self` again to include the opening `[` let matched = self.slice(0, span.len()); @@ -818,7 +821,7 @@ impl<'top> TextBufferView<'top> { let sexp_body = self.slice_to_end(1); let sexp_iter = RawTextSExpIterator_1_1::new(sexp_body); let (span, child_expr_cache) = - match TextSExpSpanFinder_1_1::new(self.context.allocator, sexp_iter).find_span(1) { + match TextSExpSpanFinder_1_1::new(self.context.allocator(), sexp_iter).find_span(1) { Ok((span, child_expr_cache)) => (span, child_expr_cache), // If the complete container isn't available, return an incomplete. Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), @@ -976,22 +979,26 @@ impl<'top> TextBufferView<'top> { // Scan ahead to find the end of this struct. let struct_body = self.slice_to_end(1); let struct_iter = RawTextStructIterator_1_1::new(struct_body); - let (span, fields) = - match TextStructSpanFinder_1_1::new(self.context.allocator, struct_iter).find_span() { - Ok((span, fields)) => (span, fields), - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label("matching a v1.1 struct") - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) - } + let (span, fields) = match TextStructSpanFinder_1_1::new( + self.context.allocator(), + struct_iter, + ) + .find_span() + { + Ok((span, fields)) => (span, fields), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a v1.1 struct") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) } - }; + } + }; // For the matched span, we use `self` again to include the opening `{` let matched = self.slice(0, span.len()); @@ -1019,7 +1026,7 @@ impl<'top> TextBufferView<'top> { // for `(:` plus the length of the macro ID. let initial_bytes_skipped = 2 + macro_id_bytes.len(); let (span, child_expr_cache) = - match TextSExpSpanFinder_1_1::new(self.context.allocator, sexp_iter) + match TextSExpSpanFinder_1_1::new(self.context.allocator(), sexp_iter) .find_span(initial_bytes_skipped) { Ok((span, child_expr_cache)) => (span, child_expr_cache), @@ -2222,9 +2229,7 @@ where #[cfg(test)] mod tests { - use crate::lazy::expanded::macro_table::MacroTable; use crate::lazy::expanded::EncodingContext; - use crate::SymbolTable; use rstest::rstest; use super::*; @@ -2239,18 +2244,9 @@ mod tests { /// Takes an `input` string and appends a trailing value to it, guaranteeing that the /// contents of the input are considered a complete token. fn new(input: &str) -> Self { - // For the sake of the unit tests, make a dummy encoding context with no lifetime - // constraints. - let macro_table_ref: &'static MacroTable = Box::leak(Box::new(MacroTable::new())); - let symbol_table_ref: &'static SymbolTable = Box::leak(Box::new(SymbolTable::new())); - let allocator_ref: &'static BumpAllocator = Box::leak(Box::new(BumpAllocator::new())); - let empty_context: EncodingContext<'static> = - EncodingContext::new(macro_table_ref, symbol_table_ref, allocator_ref); - let context: EncodingContextRef<'static> = - EncodingContextRef::new(Box::leak(Box::new(empty_context))); MatchTest { input: input.to_string(), - context, + context: EncodingContextRef::unit_test_context(), } } @@ -2904,7 +2900,8 @@ mod tests { } fn test_match_text_until_unescaped_str() { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let input = TextBufferView::new(context, r" foo bar \''' baz''' quux ".as_bytes()); let (_remaining, (matched, contains_escapes)) = input.match_text_until_unescaped_str(r#"'''"#).unwrap(); diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index fa1a5839..8b1e2c98 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -131,7 +131,7 @@ impl<'top> MatchedFieldName<'top> { } pub fn read(&self) -> IonResult> { - self.syntax.read(self.input.context.allocator, self.input) + self.syntax.read(self.input.context.allocator(), self.input) } pub fn range(&self) -> Range { @@ -1223,7 +1223,7 @@ impl MatchedClob { mod tests { use crate::lazy::bytes_ref::BytesRef; - use crate::lazy::expanded::EncodingContextRef; + use crate::lazy::expanded::{EncodingContext, EncodingContextRef}; use crate::lazy::text::buffer::TextBufferView; use crate::{Decimal, Int, IonResult, Timestamp}; @@ -1231,7 +1231,8 @@ mod tests { fn read_ints() -> IonResult<()> { fn expect_int(data: &str, expected: impl Into) { let expected: Int = expected.into(); - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_int().unwrap(); let actual = matched.read(buffer).unwrap(); @@ -1265,7 +1266,8 @@ mod tests { fn read_timestamps() -> IonResult<()> { fn expect_timestamp(data: &str, expected: Timestamp) { let data = format!("{data} "); // Append a space - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_timestamp().unwrap(); let actual = matched.read(buffer).unwrap(); @@ -1367,7 +1369,8 @@ mod tests { #[test] fn read_decimals() -> IonResult<()> { fn expect_decimal(data: &str, expected: Decimal) { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let buffer = TextBufferView::new(context, data.as_bytes()); let result = buffer.match_decimal(); assert!( @@ -1422,10 +1425,11 @@ mod tests { fn read_blobs() -> IonResult<()> { fn expect_blob(data: &str, expected: &str) { let data = format!("{data} "); // Append a space - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_blob().unwrap(); - let actual = matched.read(context.allocator, buffer).unwrap(); + let actual = matched.read(context.allocator(), buffer).unwrap(); assert_eq!( actual, expected.as_ref(), @@ -1460,11 +1464,12 @@ mod tests { // stream so the parser knows that the long-form strings are complete. We then trim // our fabricated value off of the input before reading. let data = format!("{data}\n0"); - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_string().unwrap(); let matched_input = buffer.slice(0, buffer.len() - 2); - let actual = matched.read(context.allocator, matched_input).unwrap(); + let actual = matched.read(context.allocator(), matched_input).unwrap(); assert_eq!( actual, expected, "Actual didn't match expected for input '{}'.\n{:?}\n!=\n{:?}", @@ -1505,7 +1510,7 @@ mod tests { // call to `match_clob()`. let (_remaining, matched) = buffer.match_clob().unwrap(); // The resulting buffer slice may be rejected during reading. - matched.read(context.allocator, buffer) + matched.read(context.allocator(), buffer) } fn expect_clob_error(context: EncodingContextRef, data: &str) { @@ -1564,7 +1569,8 @@ mod tests { ("{{\"foo\rbar\rbaz\"}}", "foo\rbar\rbaz"), ]; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); for (input, expected) in tests { expect_clob(context, input, expected); } diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index 658ce5d6..de09692c 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -126,6 +126,7 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_0> for LazyRawTextReader_1_0<'da #[cfg(test)] mod tests { use crate::lazy::decoder::{HasRange, HasSpan, LazyRawFieldName, LazyRawStruct, LazyRawValue}; + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_value_ref::RawValueRef; use crate::raw_symbol_ref::AsRawSymbolRef; use crate::{Decimal, IonType, RawSymbolRef, Timestamp}; @@ -473,7 +474,8 @@ mod tests { #[test] fn ranges_and_spans() -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let data = b"foo 2024T bar::38 [1, 2, 3]"; let mut reader = LazyRawTextReader_1_0::new(data); diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs index f6372a6d..77bfa373 100644 --- a/src/lazy/text/raw/sequence.rs +++ b/src/lazy/text/raw/sequence.rs @@ -1,4 +1,5 @@ #![allow(non_camel_case_types)] + use std::fmt; use std::fmt::{Debug, Formatter}; use std::ops::Range; @@ -301,13 +302,13 @@ impl<'a> Debug for LazyRawTextSExp_1_0<'a> { mod tests { use std::ops::Range; + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; - use crate::lazy::expanded::EncodingContextRef; - fn expect_sequence_range(ion_data: &str, expected: Range) -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); let value = reader.next(context)?.expect_value()?; let actual_range = value.data_range(); diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs index 6ab97ea5..31ed46c0 100644 --- a/src/lazy/text/raw/struct.rs +++ b/src/lazy/text/raw/struct.rs @@ -158,12 +158,13 @@ mod tests { use std::ops::Range; use crate::lazy::decoder::{HasRange, HasSpan, LazyRawStruct, LazyRawValue}; - use crate::lazy::expanded::EncodingContextRef; + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; fn expect_struct_range(ion_data: &str, expected: Range) -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); let value = reader.next(context)?.expect_value()?; let actual_range = value.data_range(); @@ -230,7 +231,8 @@ mod tests { ), ]; for (input, field_name_ranges) in tests { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawTextReader_1_0::new(input.as_bytes()); let struct_ = reader .next(context)? diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index de094d40..164e3ccf 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -741,6 +741,7 @@ impl<'top> TextStructSpanFinder_1_1<'top> { #[cfg(test)] mod tests { + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_value_ref::RawValueRef; use super::*; @@ -775,7 +776,8 @@ mod tests { false "#; - let context = EncodingContextRef::unit_test_context(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let reader = &mut LazyRawTextReader_1_1::new(data.as_bytes()); // $ion_1_1 diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs index 27e75f5b..a400b613 100644 --- a/src/lazy/text/value.rs +++ b/src/lazy/text/value.rs @@ -197,7 +197,7 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for LazyRawTextValue<'to fn read(&self) -> IonResult> { // Get the value's matched input, skipping over any annotations let matched_input = self.input.slice_to_end(self.encoded_value.data_offset()); - let allocator = self.input.context.allocator; + let allocator = self.input.context.allocator(); use crate::lazy::text::matched::MatchedValue::*; let value_ref = match self.encoded_value.matched() { @@ -258,7 +258,7 @@ impl<'top> Iterator for RawTextAnnotationsIterator<'top> { let matched_input = self .input .slice(span.start - self.input.offset(), span.len()); - let text = match symbol.read(self.input.context.allocator, matched_input) { + let text = match symbol.read(self.input.context.allocator(), matched_input) { Ok(text) => text, Err(e) => { self.has_returned_error = true; @@ -272,7 +272,7 @@ impl<'top> Iterator for RawTextAnnotationsIterator<'top> { #[cfg(test)] mod tests { - use crate::lazy::expanded::EncodingContextRef; + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::value::RawTextAnnotationsIterator; use crate::{IonResult, RawSymbolRef}; @@ -280,7 +280,8 @@ mod tests { #[test] fn iterate_annotations() -> IonResult<()> { fn test(input: &str) -> IonResult<()> { - let context = EncodingContextRef::unit_test_context(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let input = TextBufferView::new(context, input.as_bytes()); let mut iter = RawTextAnnotationsIterator::new(input); assert_eq!(iter.next().unwrap()?, RawSymbolRef::Text("foo")); diff --git a/src/lazy/value.rs b/src/lazy/value.rs index 02947d3f..ec38c90a 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -68,13 +68,13 @@ impl<'top, D: Decoder> LazyValue<'top, D> { #[cfg(feature = "experimental-tooling-apis")] pub fn symbol_table(&self) -> &SymbolTable { - self.expanded_value.context.symbol_table + self.expanded_value.context.symbol_table() } // When the `experimental-tooling-apis` feature is disabled, this method is `pub(crate)` #[cfg(not(feature = "experimental-tooling-apis"))] pub(crate) fn symbol_table(&self) -> &SymbolTable { - self.expanded_value.context.symbol_table + self.expanded_value.context.symbol_table() } /// Returns the [`IonType`] of this value. @@ -221,7 +221,7 @@ impl<'top, D: Decoder> LazyValue<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_value.annotations(), - symbol_table: self.expanded_value.context.symbol_table, + symbol_table: self.expanded_value.context.symbol_table(), } } @@ -275,7 +275,7 @@ impl<'top, D: Decoder> LazyValue<'top, D> { RawSymbolRef::SymbolId(sid) => self .expanded_value .context - .symbol_table + .symbol_table() .symbol_for(sid) .ok_or_else(|| { IonError::decoding_error(format!( From 0f8c894f6ed3e47093ab8ed0aa03b71f23d28287 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 10 Jun 2024 11:56:14 -0400 Subject: [PATCH 13/15] wires up binary macro evaluation --- src/lazy/any_encoding.rs | 47 +++++-- src/lazy/binary/raw/v1_1/e_expression.rs | 2 +- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 127 +++++++++++++++++-- src/lazy/binary/raw/v1_1/type_code.rs | 4 +- src/lazy/binary/raw/v1_1/type_descriptor.rs | 2 + src/lazy/expanded/macro_evaluator.rs | 2 +- src/lazy/expanded/macro_table.rs | 4 + src/lazy/reader.rs | 104 ++++++++++++++- 8 files changed, 261 insertions(+), 31 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 0ca2d483..606ec5b8 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -194,6 +194,13 @@ impl<'top> From> for LazyRawAnyEExpression<'top> { } } } +impl<'top> From> for LazyRawAnyEExpression<'top> { + fn from(binary_invocation: RawBinaryEExpression_1_1<'top>) -> Self { + LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Binary_1_1(binary_invocation), + } + } +} impl<'top> HasSpan<'top> for LazyRawAnyEExpression<'top> { fn span(&self) -> Span<'top> { @@ -222,21 +229,19 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { use LazyRawAnyEExpressionKind::*; match self.encoding { Text_1_1(ref m) => m.id(), - Binary_1_1(_) => { - todo!("macros in binary Ion 1.1 are not implemented") - } + Binary_1_1(ref m) => m.id(), } } fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> { use LazyRawAnyEExpressionKind::*; match self.encoding { - Text_1_1(m) => LazyRawAnyMacroArgsIterator { - encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), + Text_1_1(e) => LazyRawAnyMacroArgsIterator { + encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(e.raw_arguments()), + }, + Binary_1_1(e) => LazyRawAnyMacroArgsIterator { + encoding: LazyRawAnyMacroArgsIteratorKind::Binary_1_1(e.raw_arguments()), }, - Binary_1_1(_) => { - todo!("macros in binary Ion 1.1 are not yet implemented") - } } } } @@ -248,6 +253,12 @@ pub enum LazyRawAnyMacroArgsIteratorKind<'top> { TextEncoding_1_1, >>::RawArgumentsIterator<'top>, ), + Binary_1_1( + as RawEExpression< + 'top, + BinaryEncoding_1_1, + >>::RawArgumentsIterator<'top>, + ), } pub struct LazyRawAnyMacroArgsIterator<'top> { encoding: LazyRawAnyMacroArgsIteratorKind<'top>, @@ -257,8 +268,8 @@ impl<'top> Iterator for LazyRawAnyMacroArgsIterator<'top> { type Item = IonResult>; fn next(&mut self) -> Option { - match self.encoding { - LazyRawAnyMacroArgsIteratorKind::Text_1_1(mut iter) => match iter.next() { + match &mut self.encoding { + LazyRawAnyMacroArgsIteratorKind::Text_1_1(ref mut iter) => match iter.next() { Some(Ok(RawValueExpr::ValueLiteral(value))) => { Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) } @@ -270,6 +281,18 @@ impl<'top> Iterator for LazyRawAnyMacroArgsIterator<'top> { Some(Err(e)) => Some(Err(e)), None => None, }, + LazyRawAnyMacroArgsIteratorKind::Binary_1_1(ref mut iter) => match iter.next() { + Some(Ok(RawValueExpr::ValueLiteral(value))) => { + Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) + } + Some(Ok(RawValueExpr::EExp(invocation))) => { + Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Binary_1_1(invocation), + }))) + } + Some(Err(e)) => Some(Err(e)), + None => None, + }, } } } @@ -723,8 +746,8 @@ impl<'top> From> LazyRawStreamItem::::Value(value) => { LazyRawStreamItem::::Value(value.into()) } - LazyRawStreamItem::::EExpression(_) => { - todo!("Macro invocations not yet implemented in binary 1.1") + LazyRawStreamItem::::EExpression(eexp) => { + LazyRawStreamItem::::EExpression(eexp.into()) } LazyRawStreamItem::::EndOfStream(end) => { LazyRawStreamItem::::EndOfStream(end) diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index d768da16..55a29cc8 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -77,7 +77,7 @@ impl<'top> RawEExpression<'top, v1_1::Binary> for RawBinaryEExpression_1_1<'top> } } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] pub struct RawBinarySequenceCacheIterator_1_1<'top> { child_exprs: &'top [LazyRawValueExpr<'top, v1_1::Binary>], index: usize, diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 74583201..a91ef141 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -462,7 +462,7 @@ impl<'a> ImmutableBuffer<'a> { MacroIdRef::LocalAddress(opcode.byte as usize), self.consume(1), ), - EExpressionAddressFollows => todo!("e-expr with trailing address"), + EExpressionAddressFollows => todo!("e-expr with trailing address; {opcode:#0x?}",), _ => unreachable!("read_e_expression called with invalid opcode"), }; @@ -476,6 +476,7 @@ impl<'a> ImmutableBuffer<'a> { .ok_or_else(|| { IonError::decoding_error(format!("invocation of unknown macro '{macro_id:?}'")) })?; + println!("{macro_def:?}"); use MacroKind::*; let num_parameters = match macro_def.kind() { Template(t) => t.signature().parameters().len(), @@ -483,13 +484,14 @@ impl<'a> ImmutableBuffer<'a> { _ => todo!("system macros require support for argument group encoding"), }; - let mut args_buffer = buffer_after_id; let args_cache = self .context .allocator() .alloc_with(|| BumpVec::with_capacity_in(num_parameters, self.context.allocator())); + // `args_buffer` will be partially consumed in each iteration of the loop below. + let mut args_buffer = buffer_after_id; for _ in 0..num_parameters { - let value_expr = match buffer_after_id.peek_sequence_value_expr()? { + let value_expr = match args_buffer.peek_sequence_value_expr()? { Some(expr) => expr, None => { return IonResult::incomplete( @@ -502,7 +504,7 @@ impl<'a> ImmutableBuffer<'a> { args_cache.push(value_expr); } let macro_id_encoded_length = buffer_after_id.offset() - self.offset(); - let args_length = args_buffer.offset() - buffer_after_id.offset(); + let args_length = args_buffer.offset() + args_buffer.len() - buffer_after_id.offset(); let e_expression_buffer = self.slice(0, macro_id_encoded_length + args_length); let e_expression = RawBinaryEExpression_1_1::new( @@ -536,7 +538,10 @@ pub struct EncodedAnnotations { #[cfg(test)] mod tests { use super::*; + use crate::lazy::expanded::compiler::TemplateCompiler; + use crate::lazy::expanded::macro_evaluator::RawEExpression; use crate::lazy::expanded::EncodingContext; + use crate::lazy::text::raw::v1_1::reader::MacroAddress; fn input_test>(input: A) { let empty_context = EncodingContext::empty(); @@ -587,10 +592,116 @@ mod tests { assert_eq!(pad_size, 4); } + fn eexp_test( + macro_source: &str, + encode_macro_fn: impl FnOnce(MacroAddress) -> Vec, + test_fn: impl FnOnce(RawBinaryEExpression_1_1) -> IonResult<()>, + ) -> IonResult<()> { + let mut context = EncodingContext::empty(); + let template_macro = TemplateCompiler::compile_from_text(context.get_ref(), macro_source)?; + let macro_address = context.macro_table.add_macro(template_macro)?; + let opcode_byte = u8::try_from(macro_address).unwrap(); + let binary_ion = encode_macro_fn(opcode_byte as usize); + let buffer = ImmutableBuffer::new(context.get_ref(), &binary_ion); + let eexp = buffer.read_e_expression(Opcode::from_byte(opcode_byte))?; + assert_eq!(eexp.id(), MacroIdRef::LocalAddress(macro_address)); + println!("{:?}", eexp); + assert_eq!(eexp.id, MacroIdRef::LocalAddress(opcode_byte as usize)); + test_fn(eexp) + } + #[test] - fn read_e_expressions() { - let empty_context = EncodingContext::empty(); - let context = empty_context.get_ref(); - // let eexp = + fn read_eexp_without_args() -> IonResult<()> { + let macro_source = r#" + (macro seventeen () 17) + "#; + let encode_eexp_fn = |address: MacroAddress| vec![address as u8]; + eexp_test( + macro_source, + encode_eexp_fn, + |eexp: RawBinaryEExpression_1_1| { + let mut args = eexp.raw_arguments(); + assert!(args.next().is_none()); + Ok(()) + }, + ) + } + + #[test] + fn read_eexp_with_one_arg() -> IonResult<()> { + let macro_source = r#" + (macro greet (name) + (make_string "Hello, " name "!") + ) + "#; + + #[rustfmt::skip] + let encode_eexp_fn = |address: MacroAddress| vec![ + address as u8, + // === 8-byte string ==== + 0x98, + // M i c h e l l e + 0x4D, 0x69, 0x63, 0x68, 0x65, 0x6C, 0x6C, 0x65, + ]; + + let args_test = |eexp: RawBinaryEExpression_1_1| { + let mut args = eexp.raw_arguments(); + assert_eq!( + args.next() + .unwrap()? + .expect_value()? + .read()? + .expect_string()?, + "Michelle" + ); + Ok(()) + }; + + eexp_test(macro_source, encode_eexp_fn, args_test) + } + + #[test] + fn read_eexp_with_two_args() -> IonResult<()> { + let macro_source = r#" + (macro greet (name day) + (make_string "Hello, " name "! Have a pleasant " day ".") + ) + "#; + + #[rustfmt::skip] + let encode_eexp_fn = |address: MacroAddress| vec![ + address as u8, + // === 8-byte string ==== + 0x98, + // M i c h e l l e + 0x4D, 0x69, 0x63, 0x68, 0x65, 0x6C, 0x6C, 0x65, + // === 7-byte string === + 0x97, + // T u e s d a y + 0x54, 0x75, 0x65, 0x73, 0x64, 0x61, 0x79, + ]; + + let args_test = |eexp: RawBinaryEExpression_1_1| { + let mut args = eexp.raw_arguments(); + assert_eq!( + args.next() + .unwrap()? + .expect_value()? + .read()? + .expect_string()?, + "Michelle" + ); + assert_eq!( + args.next() + .unwrap()? + .expect_value()? + .read()? + .expect_string()?, + "Tuesday" + ); + Ok(()) + }; + + eexp_test(macro_source, encode_eexp_fn, args_test) } } diff --git a/src/lazy/binary/raw/v1_1/type_code.rs b/src/lazy/binary/raw/v1_1/type_code.rs index 41c44a2c..ff91f24f 100644 --- a/src/lazy/binary/raw/v1_1/type_code.rs +++ b/src/lazy/binary/raw/v1_1/type_code.rs @@ -13,8 +13,8 @@ use crate::IonType; /// * Whether the next type code is reserved. #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum OpcodeType { - EExpressionWithAddress, // 0x00-0x4F - - EExpressionAddressFollows, // 0x40-0x4F - + EExpressionWithAddress, // 0x00-0x50 - + EExpressionAddressFollows, // 0x50-0x5F - Integer, // 0x60-0x68 - Integer up to 8 bytes wide Float, // 0x6A-0x6D - Boolean, // 0x6E-0x6F - diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 56645510..f68c129e 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -58,6 +58,8 @@ impl Opcode { use OpcodeType::*; let (opcode_type, length_code, ion_type) = match (high_nibble, low_nibble) { + (0x0..=0x4, _) => (EExpressionWithAddress, low_nibble, None), + (0x5, _) => (EExpressionAddressFollows, low_nibble, None), (0x6, 0x0..=0x8) => (Integer, low_nibble, Some(IonType::Int)), (0x6, 0xA..=0xD) => (Float, low_nibble, Some(IonType::Float)), (0x6, 0xE..=0xF) => (Boolean, low_nibble, Some(IonType::Bool)), diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index acc7c596..df6cec59 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -308,7 +308,7 @@ impl<'top, D: Decoder> MacroEvaluator<'top, D> { let capacity_hint = num_args_hint.1.unwrap_or(num_args_hint.0); let mut args = BumpVec::with_capacity_in(capacity_hint, allocator); - for arg in invocation.arguments(self.environment()) { + for arg in args_iter { args.push(arg?); } let environment = Environment::new(args); diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index 37628f69..236c3276 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -85,6 +85,10 @@ impl MacroTable { } } + pub fn len(&self) -> usize { + self.macros_by_address.len() + } + pub fn macro_with_id(&'_ self, id: MacroIdRef<'_>) -> Option> { match id { MacroIdRef::LocalName(name) => self.macro_with_name(name), diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs index bec2a901..540999ac 100644 --- a/src/lazy/reader.rs +++ b/src/lazy/reader.rs @@ -1,16 +1,15 @@ #![allow(non_camel_case_types)] -use crate::element::reader::ElementReader; +use crate::{IonError, IonResult}; use crate::element::Element; +use crate::element::reader::ElementReader; use crate::lazy::decoder::Decoder; -use crate::lazy::encoding::TextEncoding_1_1; use crate::lazy::streaming_raw_reader::IonInput; use crate::lazy::system_reader::SystemReader; use crate::lazy::text::raw::v1_1::reader::MacroAddress; use crate::lazy::value::LazyValue; use crate::read_config::ReadConfig; use crate::result::IonFailure; -use crate::{IonError, IonResult}; /// A binary reader that only reads each value that it visits upon request (that is: lazily). /// @@ -127,8 +126,10 @@ impl Reader { } } -impl Reader { - // Temporary method for defining/testing templates. +impl Reader { + // Temporary method for defining/testing templates. This method does not confirm that the + // reader's encoding supports macros--that check will happen when encoding directives are + // supported. // TODO: Remove this when the reader can understand 1.1 encoding directives. pub fn register_template(&mut self, template_definition: &str) -> IonResult { self.system_reader @@ -174,13 +175,14 @@ impl ElementReader for Reader Vec, + test_fn: impl FnOnce(Reader) -> IonResult<()>, + ) -> IonResult<()> { + // Because readers do not yet understand encoding directives, we'll pre-calculate the + // macro ID that will be assigned. Make an empty encoding context... + let context = EncodingContext::empty(); + // ...and see how many macros it contains. This will change as development continues. + let macro_address = context.macro_table.len(); + let opcode_byte = u8::try_from(macro_address).unwrap(); + // Using that ID, encode a binary stream containing an invocation of the new macro. + // This function must add an IVM and the encoded e-expression ID, followed by any number + // of arguments that matches the provided signature. + let binary_ion = encode_macro_fn(opcode_byte as usize); + // Construct a reader for the encoded data. + let mut reader = Reader::new(AnyEncoding, binary_ion.as_slice())?; + // Register the template definition, getting the same ID we used earlier. + let actual_address = reader.register_template(macro_source)?; + assert_eq!(macro_address, actual_address, "Assigned macro address did not match expected address."); + // Use the provided test function to confirm that the data expands to the expected stream. + test_fn(reader) + } + + #[test] + fn expand_binary_template_macro() -> IonResult<()> { + let macro_source = "(macro seventeen () 17)"; + let encode_macro_fn = |address| vec![0xE0, 0x01, 0x01, 0xEA, address as u8]; + expand_macro_test(macro_source, encode_macro_fn, |mut reader| { + assert_eq!(reader.expect_next()?.read()?.expect_i64()?, 17); + Ok(()) + }) + } + + #[test] + fn expand_binary_template_macro_with_one_arg() -> IonResult<()> { + let macro_source = r#" + (macro greet (name) + (make_string "Hello, " name "!") + ) + "#; + #[rustfmt::skip] + let encode_macro_fn = |address| vec![ + // === 1.1 IVM === + 0xE0, 0x01, 0x01, 0xEA, + // === Macro ID === + address as u8, + // === Arg 1 === + // 8-byte string + 0x98, + // M i c h e l l e + 0x4D, 0x69, 0x63, 0x68, 0x65, 0x6C, 0x6C, 0x65, + ]; + expand_macro_test(macro_source, encode_macro_fn, |mut reader| { + assert_eq!(reader.expect_next()?.read()?.expect_string()?, "Hello, Michelle!"); + Ok(()) + }) + } + + #[test] + fn expand_binary_template_macro_with_multiple_outputs() -> IonResult<()> { + let macro_source = r#" + (macro questions (food) + (values + (make_string "What color is a " food "?") + (make_string "How much potassium is in a " food "?") + (make_string "What wine should I pair with a " food "?"))) + "#; + #[rustfmt::skip] + let encode_macro_fn = |address| vec![ + // === 1.1 IVM === + 0xE0, 0x01, 0x01, 0xEA, + // === Macro ID === + address as u8, + // === Arg 1 === + // 6-byte string + 0x96, + // b a n a n a + 0x62, 0x61, 0x6E, 0x61, 0x6E, 0x61 + ]; + expand_macro_test(macro_source, encode_macro_fn, |mut reader| { + assert_eq!(reader.expect_next()?.read()?.expect_string()?, "What color is a banana?"); + assert_eq!(reader.expect_next()?.read()?.expect_string()?, "How much potassium is in a banana?"); + assert_eq!(reader.expect_next()?.read()?.expect_string()?, "What wine should I pair with a banana?"); + Ok(()) + }) + } } From afc58cf1af0aec7fc0042362ad97a28fc6bf744f Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 10 Jun 2024 14:23:49 -0400 Subject: [PATCH 14/15] cleanup --- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 5 ++--- src/lazy/expanded/mod.rs | 7 ------- src/lazy/text/buffer.rs | 4 +++- src/lazy/text/raw/reader.rs | 3 ++- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index a91ef141..5cb60868 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -467,8 +467,8 @@ impl<'a> ImmutableBuffer<'a> { }; // TODO: When we support untagged parameter encodings, we need to use the signature's - // parameter encodings to drive this process. For now (while everything is tagged) - // and cardinality is always required, we just loop `n` times. + // parameter encodings to drive this process. For now--while everything is tagged + // and cardinality is always required--we just loop `num_parameters` times. let macro_def = self .context .macro_table @@ -476,7 +476,6 @@ impl<'a> ImmutableBuffer<'a> { .ok_or_else(|| { IonError::decoding_error(format!("invocation of unknown macro '{macro_id:?}'")) })?; - println!("{macro_def:?}"); use MacroKind::*; let num_parameters = match macro_def.kind() { Template(t) => t.signature().parameters().len(), diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 03ef4b00..ad98a22f 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -129,13 +129,6 @@ impl<'top> EncodingContextRef<'top> { Self { context } } - #[cfg(test)] - pub fn unit_test_context() -> EncodingContextRef<'static> { - // For the sake of the unit tests, make a dummy encoding context with no lifetime - // constraints. - EncodingContextRef::new(Box::leak(Box::new(EncodingContext::empty()))) - } - pub fn allocator(&self) -> &'top BumpAllocator { &self.context.allocator } diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 8d23a5a3..c7f415b1 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -2246,7 +2246,9 @@ mod tests { fn new(input: &str) -> Self { MatchTest { input: input.to_string(), - context: EncodingContextRef::unit_test_context(), + // This uses `leak` to get an `EncodingContextRef` with a `static` lifetime + // for the sake of unit test simplicity. + context: EncodingContextRef::new(Box::leak(Box::new(EncodingContext::empty()))), } } diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index de09692c..75f868b6 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -298,9 +298,10 @@ mod tests { "#, ); + let encoding_context = EncodingContext::empty(); let reader = &mut TestReader { reader: LazyRawTextReader_1_0::new(data.as_bytes()), - context: EncodingContextRef::unit_test_context(), + context: encoding_context.get_ref(), }; assert_eq!(reader.next()?.expect_ivm()?.version(), (1, 0)); From b1964617fa29bf336ddc816027d7b161da7c287a Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 10 Jun 2024 14:24:03 -0400 Subject: [PATCH 15/15] cargo fmt --- src/lazy/reader.rs | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs index 540999ac..2978bc97 100644 --- a/src/lazy/reader.rs +++ b/src/lazy/reader.rs @@ -1,8 +1,7 @@ #![allow(non_camel_case_types)] -use crate::{IonError, IonResult}; -use crate::element::Element; use crate::element::reader::ElementReader; +use crate::element::Element; use crate::lazy::decoder::Decoder; use crate::lazy::streaming_raw_reader::IonInput; use crate::lazy::system_reader::SystemReader; @@ -10,6 +9,7 @@ use crate::lazy::text::raw::v1_1::reader::MacroAddress; use crate::lazy::value::LazyValue; use crate::read_config::ReadConfig; use crate::result::IonFailure; +use crate::{IonError, IonResult}; /// A binary reader that only reads each value that it visits upon request (that is: lazily). /// @@ -175,14 +175,14 @@ impl ElementReader for Reader