Skip to content

Commit 583006a

Browse files
gendxchyyran
authored andcommitted
Reduce unnecessary allocations and indirections
* Changed literal_probs array from a Vec<Vec<u16>> to a Vec2D backed by a contiguous allocation * BitTrees in LenDecoder and DecoderState are now stored inline. The actual BitTree data still lives in a Vec but one level of indirection is reduced. * Don't bother with filling stack-allocated DecoderState arrays on reset, and just recreate the arrays dropping the existing ones. # Conflicts: # src/decode/lzma.rs
2 parents a24679d + 2ea7463 commit 583006a

File tree

12 files changed

+251
-56
lines changed

12 files changed

+251
-56
lines changed

.github/workflows/format.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
on: [push, pull_request]
2-
name: Formatting on stable toolchain
2+
name: Formatting on nightly toolchain
33
jobs:
44
format:
55
runs-on: ubuntu-latest
66
steps:
77
- uses: actions/checkout@v2
8-
- uses: dtolnay/rust-toolchain@stable
8+
- uses: dtolnay/rust-toolchain@nightly
99
with:
1010
components: rustfmt
1111

rustfmt.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
imports_granularity = "Module"

src/decode/lzma.rs

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer};
2-
use crate::decode::rangecoder;
3-
use crate::decode::rangecoder::RangeDecoder;
4-
use crate::decompress::Options;
5-
use crate::decompress::UnpackedSize;
2+
use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder};
3+
use crate::decompress::{Options, UnpackedSize};
64
use crate::error;
5+
use crate::util::vec2d::Vec2D;
76
use byteorder::{LittleEndian, ReadBytesExt};
87
use std::io;
98

@@ -167,9 +166,9 @@ pub(crate) struct DecoderState {
167166
partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>,
168167
pub(crate) lzma_props: LzmaProperties,
169168
unpacked_size: Option<u64>,
170-
literal_probs: Vec<Vec<u16>>,
171-
pos_slot_decoder: Vec<rangecoder::BitTree>,
172-
align_decoder: rangecoder::BitTree,
169+
literal_probs: Vec2D<u16>,
170+
pos_slot_decoder: [BitTree; 4],
171+
align_decoder: BitTree,
173172
pos_decoders: [u16; 115],
174173
is_match: [u16; 192], // true = LZ, false = literal
175174
is_rep: [u16; 12],
@@ -179,8 +178,8 @@ pub(crate) struct DecoderState {
179178
is_rep_0long: [u16; 192],
180179
state: usize,
181180
rep: [usize; 4],
182-
len_decoder: rangecoder::LenDecoder,
183-
rep_len_decoder: rangecoder::LenDecoder,
181+
len_decoder: LenDecoder,
182+
rep_len_decoder: LenDecoder,
184183
}
185184

186185
impl DecoderState {
@@ -190,9 +189,14 @@ impl DecoderState {
190189
partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]),
191190
lzma_props,
192191
unpacked_size,
193-
literal_probs: vec![vec![0x400; 0x300]; 1 << (lzma_props.lc + lzma_props.lp)],
194-
pos_slot_decoder: vec![rangecoder::BitTree::new(6); 4],
195-
align_decoder: rangecoder::BitTree::new(4),
192+
literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)),
193+
pos_slot_decoder: [
194+
BitTree::new(6),
195+
BitTree::new(6),
196+
BitTree::new(6),
197+
BitTree::new(6),
198+
],
199+
align_decoder: BitTree::new(4),
196200
pos_decoders: [0x400; 115],
197201
is_match: [0x400; 192],
198202
is_rep: [0x400; 12],
@@ -202,33 +206,36 @@ impl DecoderState {
202206
is_rep_0long: [0x400; 192],
203207
state: 0,
204208
rep: [0; 4],
205-
len_decoder: rangecoder::LenDecoder::new(),
206-
rep_len_decoder: rangecoder::LenDecoder::new(),
209+
len_decoder: LenDecoder::new(),
210+
rep_len_decoder: LenDecoder::new(),
207211
}
208212
}
209213

210214
pub fn reset_state(&mut self, new_props: LzmaProperties) {
211215
new_props.validate();
212216
if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp {
213217
// We can reset here by filling the existing buffer with 0x400.
214-
self.literal_probs.iter_mut().for_each(|v| v.fill(0x400))
218+
self.literal_probs.fill(0x400);
215219
} else {
216220
// We need to reallocate because of the new size of `lc+lp`.
217-
self.literal_probs = vec![vec![0x400; 0x300]; 1 << (new_props.lc + new_props.lp)];
221+
self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300));
218222
}
219223

220224
self.lzma_props = new_props;
221225
self.pos_slot_decoder.iter_mut().for_each(|t| t.reset());
222226
self.align_decoder.reset();
223-
self.pos_decoders.fill(0x400);
224-
self.is_match.fill(0x400);
225-
self.is_rep.fill(0x400);
226-
self.is_rep_g0.fill(0x400);
227-
self.is_rep_g1.fill(0x400);
228-
self.is_rep_g2.fill(0x400);
229-
self.is_rep_0long.fill(0x400);
227+
// For stack-allocated arrays, it was found to be faster to re-create new arrays
228+
// dropping the existing one, rather than using `fill` to reset the contents to zero.
229+
// Heap-based arrays use fill to keep their allocation rather than reallocate.
230+
self.pos_decoders = [0x400; 115];
231+
self.is_match = [0x400; 192];
232+
self.is_rep = [0x400; 12];
233+
self.is_rep_g0 = [0x400; 12];
234+
self.is_rep_g1 = [0x400; 12];
235+
self.is_rep_g2 = [0x400; 12];
236+
self.is_rep_0long = [0x400; 192];
230237
self.state = 0;
231-
self.rep.fill(0);
238+
self.rep = [0; 4];
232239
self.len_decoder.reset();
233240
self.rep_len_decoder.reset();
234241
}
@@ -240,7 +247,7 @@ impl DecoderState {
240247
pub fn process<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
241248
&mut self,
242249
output: &mut LZB,
243-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
250+
rangecoder: &mut RangeDecoder<'a, R>,
244251
) -> error::Result<()> {
245252
self.process_mode(output, rangecoder, ProcessingMode::Finish)
246253
}
@@ -249,7 +256,7 @@ impl DecoderState {
249256
pub fn process_stream<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
250257
&mut self,
251258
output: &mut LZB,
252-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
259+
rangecoder: &mut RangeDecoder<'a, R>,
253260
) -> error::Result<()> {
254261
self.process_mode(output, rangecoder, ProcessingMode::Partial)
255262
}
@@ -263,7 +270,7 @@ impl DecoderState {
263270
fn process_next_inner<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
264271
&mut self,
265272
output: &mut LZB,
266-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
273+
rangecoder: &mut RangeDecoder<'a, R>,
267274
update: bool,
268275
) -> error::Result<ProcessingStatus> {
269276
let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1);
@@ -380,7 +387,7 @@ impl DecoderState {
380387
fn process_next<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
381388
&mut self,
382389
output: &mut LZB,
383-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
390+
rangecoder: &mut RangeDecoder<'a, R>,
384391
) -> error::Result<ProcessingStatus> {
385392
self.process_next_inner(output, rangecoder, true)
386393
}
@@ -398,15 +405,15 @@ impl DecoderState {
398405
code: u32,
399406
) -> error::Result<()> {
400407
let mut temp = std::io::Cursor::new(buf);
401-
let mut rangecoder = rangecoder::RangeDecoder::from_parts(&mut temp, range, code);
408+
let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code);
402409
let _ = self.process_next_inner(output, &mut rangecoder, false)?;
403410
Ok(())
404411
}
405412

406413
/// Utility function to read data into the partial input buffer.
407414
fn read_partial_input_buf<'a, R: io::BufRead>(
408415
&mut self,
409-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
416+
rangecoder: &mut RangeDecoder<'a, R>,
410417
) -> error::Result<()> {
411418
// Fill as much of the tmp buffer as possible
412419
let start = self.partial_input_buf.position() as usize;
@@ -420,7 +427,7 @@ impl DecoderState {
420427
fn process_mode<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
421428
&mut self,
422429
output: &mut LZB,
423-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
430+
rangecoder: &mut RangeDecoder<'a, R>,
424431
mode: ProcessingMode,
425432
) -> error::Result<()> {
426433
loop {
@@ -461,11 +468,8 @@ impl DecoderState {
461468
// Run the decompressor on the tmp buffer
462469
let mut tmp_reader =
463470
io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]);
464-
let mut tmp_rangecoder = rangecoder::RangeDecoder::from_parts(
465-
&mut tmp_reader,
466-
rangecoder.range,
467-
rangecoder.code,
468-
);
471+
let mut tmp_rangecoder =
472+
RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code);
469473
let res = self.process_next(output, &mut tmp_rangecoder)?;
470474

471475
// Update the actual rangecoder
@@ -514,7 +518,7 @@ impl DecoderState {
514518
fn decode_literal<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
515519
&mut self,
516520
output: &mut LZB,
517-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
521+
rangecoder: &mut RangeDecoder<'a, R>,
518522
update: bool,
519523
) -> error::Result<u8> {
520524
let def_prev_byte = 0u8;
@@ -550,7 +554,7 @@ impl DecoderState {
550554

551555
fn decode_distance<'a, R: io::BufRead>(
552556
&mut self,
553-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
557+
rangecoder: &mut RangeDecoder<'a, R>,
554558
length: usize,
555559
update: bool,
556560
) -> error::Result<usize> {

src/decode/lzma2.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
use crate::decode::lzbuffer;
21
use crate::decode::lzbuffer::LzBuffer;
3-
use crate::decode::lzma::DecoderState;
4-
use crate::decode::lzma::LzmaProperties;
5-
use crate::decode::rangecoder;
2+
use crate::decode::lzma::{DecoderState, LzmaProperties};
3+
use crate::decode::{lzbuffer, rangecoder};
64
use crate::error;
75
use byteorder::{BigEndian, ReadBytesExt};
86
use std::io;

src/decode/rangecoder.rs

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ impl BitTree {
190190
pub struct LenDecoder {
191191
choice: u16,
192192
choice2: u16,
193-
low_coder: Vec<BitTree>,
194-
mid_coder: Vec<BitTree>,
193+
low_coder: [BitTree; 16],
194+
mid_coder: [BitTree; 16],
195195
high_coder: BitTree,
196196
}
197197

@@ -200,8 +200,42 @@ impl LenDecoder {
200200
LenDecoder {
201201
choice: 0x400,
202202
choice2: 0x400,
203-
low_coder: vec![BitTree::new(3); 16],
204-
mid_coder: vec![BitTree::new(3); 16],
203+
low_coder: [
204+
BitTree::new(3),
205+
BitTree::new(3),
206+
BitTree::new(3),
207+
BitTree::new(3),
208+
BitTree::new(3),
209+
BitTree::new(3),
210+
BitTree::new(3),
211+
BitTree::new(3),
212+
BitTree::new(3),
213+
BitTree::new(3),
214+
BitTree::new(3),
215+
BitTree::new(3),
216+
BitTree::new(3),
217+
BitTree::new(3),
218+
BitTree::new(3),
219+
BitTree::new(3),
220+
],
221+
mid_coder: [
222+
BitTree::new(3),
223+
BitTree::new(3),
224+
BitTree::new(3),
225+
BitTree::new(3),
226+
BitTree::new(3),
227+
BitTree::new(3),
228+
BitTree::new(3),
229+
BitTree::new(3),
230+
BitTree::new(3),
231+
BitTree::new(3),
232+
BitTree::new(3),
233+
BitTree::new(3),
234+
BitTree::new(3),
235+
BitTree::new(3),
236+
BitTree::new(3),
237+
BitTree::new(3),
238+
],
205239
high_coder: BitTree::new(8),
206240
}
207241
}

src/decode/util.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use std::hash;
2-
use std::io;
1+
use std::{hash, io};
32

43
pub fn read_tag<R: io::BufRead>(input: &mut R, tag: &[u8]) -> io::Result<bool> {
54
let mut buf = vec![0; tag.len()];

src/encode/util.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use std::hash;
2-
use std::io;
1+
use std::{hash, io};
32

43
// A Write computing a digest on the bytes written.
54
pub struct HasherWrite<'a, W, H>

src/encode/xz.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use crate::decode;
2-
use crate::encode::lzma2;
3-
use crate::encode::util;
2+
use crate::encode::{lzma2, util};
43
use crate::xz::{footer, header, CheckMethod, StreamFlags};
54
use byteorder::{LittleEndian, WriteBytesExt};
65
use crc::{crc32, Hasher32};

src/error.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
//! Error handling.
22
33
use std::fmt::Display;
4-
use std::io;
5-
use std::result;
4+
use std::{io, result};
65

76
/// Library errors.
87
#[derive(Debug)]

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod encode;
1212

1313
pub mod error;
1414

15+
mod util;
1516
mod xz;
1617

1718
use std::io;

src/util/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod vec2d;

0 commit comments

Comments
 (0)