diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a94195c..d1d3db2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,13 @@ - Support field renaming via `#[hdf5(rename = "new_name")]` helper attribute. - Add a `ByteReader` which implements `std::io::{Read, Seek}` for 1D `u8` datasets. Usage via `Dataset::as_byte_reader()`. +- Add `chunk_visit` to visit all chunks in a dataset. ### Changed - The `H5Type` derive macro now uses `proc-macro-error` to emit error messages. - MSRV is now `1.64.0` and Rust edition has now been bumped to 2021. +- Types in ChunkInfo has been changed to match HDF5 ### Fixed diff --git a/hdf5/src/hl.rs b/hdf5/src/hl.rs index 5c508177..ac813036 100644 --- a/hdf5/src/hl.rs +++ b/hdf5/src/hl.rs @@ -1,4 +1,5 @@ pub mod attribute; +pub mod chunks; pub mod container; pub mod dataset; pub mod dataspace; diff --git a/hdf5/src/hl/chunks.rs b/hdf5/src/hl/chunks.rs new file mode 100644 index 00000000..1de68115 --- /dev/null +++ b/hdf5/src/hl/chunks.rs @@ -0,0 +1,183 @@ +use crate::internal_prelude::*; + +#[cfg(feature = "1.10.5")] +use hdf5_sys::h5d::{H5Dget_chunk_info, H5Dget_num_chunks}; + +#[cfg(feature = "1.10.5")] +#[derive(Clone, Debug, PartialEq, Eq)] +/// Information on a chunk in a Dataset +pub struct ChunkInfo { + /// Array with a size equal to the dataset’s rank whose elements contain 0-based + /// logical positions of the chunk’s first element in each dimension. + pub offset: Vec, + /// Filter mask that indicates which filters were used with the chunk when written. + /// + /// A zero value indicates that all enabled filters are applied on the chunk. + /// A filter is skipped if the bit corresponding to the filter’s position in + /// the pipeline (0 ≤ position < 32) is turned on. + pub filter_mask: u32, + /// Chunk address in the file. + pub addr: haddr_t, + /// Chunk size in bytes. + pub size: hsize_t, +} + +#[cfg(feature = "1.10.5")] +impl ChunkInfo { + pub(crate) fn new(ndim: usize) -> Self { + let offset = vec![0; ndim]; + Self { offset, filter_mask: 0, addr: 0, size: 0 } + } + + /// Returns positional indices of disabled filters. + pub fn disabled_filters(&self) -> Vec { + (0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect() + } +} + +#[cfg(feature = "1.10.5")] +pub(crate) fn chunk_info(ds: &Dataset, index: usize) -> Option { + if !ds.is_chunked() { + return None; + } + h5lock!(ds.space().map_or(None, |s| { + let mut chunk_info = ChunkInfo::new(ds.ndim()); + h5check(H5Dget_chunk_info( + ds.id(), + s.id(), + index as _, + chunk_info.offset.as_mut_ptr(), + &mut chunk_info.filter_mask, + &mut chunk_info.addr, + &mut chunk_info.size, + )) + .map(|_| chunk_info) + .ok() + })) +} + +#[cfg(feature = "1.10.5")] +pub(crate) fn get_num_chunks(ds: &Dataset) -> Option { + if !ds.is_chunked() { + return None; + } + h5lock!(ds.space().map_or(None, |s| { + let mut n: hsize_t = 0; + h5check(H5Dget_num_chunks(ds.id(), s.id(), &mut n)).map(|_| n as _).ok() + })) +} + +#[cfg(feature = "1.14.0")] +mod v1_14_0 { + use super::*; + use hdf5_sys::h5d::H5Dchunk_iter; + + /// Borrowed version of [ChunkInfo](crate::dataset::ChunkInfo) + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct ChunkInfoRef<'a> { + pub offset: &'a [hsize_t], + pub filter_mask: u32, + pub addr: haddr_t, + pub size: hsize_t, + } + + impl<'a> ChunkInfoRef<'a> { + /// Returns positional indices of disabled filters. + pub fn disabled_filters(&self) -> Vec { + (0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect() + } + } + + impl<'a> From> for ChunkInfo { + fn from(val: ChunkInfoRef<'a>) -> Self { + Self { + offset: val.offset.to_owned(), + filter_mask: val.filter_mask, + addr: val.addr, + size: val.size, + } + } + } + + #[repr(C)] + struct RustCallback { + pub ndims: hsize_t, + pub callback: F, + } + + extern "C" fn chunks_callback( + offset: *const hsize_t, filter_mask: c_uint, addr: haddr_t, size: hsize_t, + op_data: *mut c_void, + ) -> herr_t + where + F: FnMut(ChunkInfoRef) -> i32, + { + unsafe { + std::panic::catch_unwind(|| { + let data: *mut RustCallback = op_data.cast::>(); + let ndims = (*data).ndims; + let callback = &mut (*data).callback; + + let offset = std::slice::from_raw_parts(offset, ndims as usize); + + let info = ChunkInfoRef { offset, filter_mask, addr, size }; + + callback(info) + }) + .unwrap_or(-1) + } + } + + pub(crate) fn visit(ds: &Dataset, callback: F) -> Result<()> + where + F: for<'a> FnMut(ChunkInfoRef<'a>) -> i32, + { + let mut data = RustCallback:: { ndims: ds.ndim() as _, callback }; + + h5try!(H5Dchunk_iter( + ds.id(), + H5P_DEFAULT, + Some(chunks_callback::), + std::ptr::addr_of_mut!(data).cast() + )); + + Ok(()) + } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn chunks_visit() { + with_tmp_file(|f| { + let ds = f.new_dataset::().no_chunk().shape((4, 4)).create("nochunk").unwrap(); + assert_err_re!(visit(&ds, |_| 0), "not a chunked dataset"); + + let ds = + f.new_dataset::().shape([3, 2]).chunk([1, 1]).create("chunk").unwrap(); + ds.write(&ndarray::arr2(&[[1, 2], [3, 4], [5, 6]])).unwrap(); + + let mut i = 0; + let f = |c: ChunkInfoRef| { + match i { + 0 => assert_eq!(c.offset, [0, 0]), + 1 => assert_eq!(c.offset, [0, 1]), + 2 => assert_eq!(c.offset, [1, 0]), + 3 => assert_eq!(c.offset, [1, 1]), + 4 => assert_eq!(c.offset, [2, 0]), + 5 => assert_eq!(c.offset, [2, 1]), + _ => unreachable!(), + } + assert_eq!(c.size, std::mem::size_of::() as u64); + i += 1; + 0 + }; + visit(&ds, f).unwrap(); + assert_eq!(i, 6); + }) + } + } +} +#[cfg(feature = "1.14.0")] +pub use v1_14_0::*; diff --git a/hdf5/src/hl/dataset.rs b/hdf5/src/hl/dataset.rs index d0cc1dc0..f8e9ee64 100644 --- a/hdf5/src/hl/dataset.rs +++ b/hdf5/src/hl/dataset.rs @@ -8,8 +8,6 @@ use hdf5_sys::h5d::{ H5Dcreate2, H5Dcreate_anon, H5Dget_access_plist, H5Dget_create_plist, H5Dget_offset, H5Dset_extent, }; -#[cfg(feature = "1.10.5")] -use hdf5_sys::h5d::{H5Dget_chunk_info, H5Dget_num_chunks}; use hdf5_sys::h5l::H5Ldelete; use hdf5_sys::h5p::H5P_DEFAULT; use hdf5_sys::h5z::H5Z_filter_t; @@ -66,36 +64,6 @@ impl Deref for Dataset { } } -#[cfg(feature = "1.10.5")] -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ChunkInfo { - /// Array with a size equal to the dataset’s rank whose elements contain 0-based - /// logical positions of the chunk’s first element in each dimension. - pub offset: Vec, - /// Filter mask that indicates which filters were used with the chunk when written. - /// A zero value indicates that all enabled filters are applied on the chunk. - /// A filter is skipped if the bit corresponding to the filter’s position in - /// the pipeline (0 ≤ position < 32) is turned on. - pub filter_mask: u32, - /// Chunk address in the file. - pub addr: u64, - /// Chunk size in bytes. - pub size: u64, -} - -#[cfg(feature = "1.10.5")] -impl ChunkInfo { - pub(crate) fn new(ndim: usize) -> Self { - let offset = vec![0; ndim]; - Self { offset, filter_mask: 0, addr: 0, size: 0 } - } - - /// Returns positional indices of disabled filters. - pub fn disabled_filters(&self) -> Vec { - (0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect() - } -} - impl Dataset { /// Returns a copy of the dataset access property list. pub fn access_plist(&self) -> Result { @@ -135,35 +103,13 @@ impl Dataset { #[cfg(feature = "1.10.5")] /// Returns the number of chunks if the dataset is chunked. pub fn num_chunks(&self) -> Option { - if !self.is_chunked() { - return None; - } - h5lock!(self.space().map_or(None, |s| { - let mut n: hsize_t = 0; - h5check(H5Dget_num_chunks(self.id(), s.id(), &mut n)).map(|_| n as _).ok() - })) + crate::hl::chunks::get_num_chunks(self) } #[cfg(feature = "1.10.5")] /// Retrieves the chunk information for the chunk specified by its index. - pub fn chunk_info(&self, index: usize) -> Option { - if !self.is_chunked() { - return None; - } - h5lock!(self.space().map_or(None, |s| { - let mut chunk_info = ChunkInfo::new(self.ndim()); - h5check(H5Dget_chunk_info( - self.id(), - s.id(), - index as _, - chunk_info.offset.as_mut_ptr(), - &mut chunk_info.filter_mask, - &mut chunk_info.addr, - &mut chunk_info.size, - )) - .map(|_| chunk_info) - .ok() - })) + pub fn chunk_info(&self, index: usize) -> Option { + crate::hl::chunks::chunk_info(self, index) } /// Returns the chunk shape if the dataset is chunked. @@ -171,6 +117,15 @@ impl Dataset { self.dcpl().map_or(None, |pl| pl.chunk()) } + /// Visit all chunks + #[cfg(feature = "1.14.0")] + pub fn chunks_visit(&self, callback: F) -> Result<()> + where + F: for<'a> FnMut(crate::dataset::ChunkInfoRef<'a>) -> i32, + { + crate::hl::chunks::visit(self, callback) + } + /// Returns the absolute byte offset of the dataset in the file if such offset is defined /// (which is not the case for datasets that are chunked, compact or not allocated yet). pub fn offset(&self) -> Option { diff --git a/hdf5/src/lib.rs b/hdf5/src/lib.rs index 587b0539..5f6c3411 100644 --- a/hdf5/src/lib.rs +++ b/hdf5/src/lib.rs @@ -78,7 +78,9 @@ mod export { pub mod dataset { #[cfg(feature = "1.10.5")] - pub use crate::hl::dataset::ChunkInfo; + pub use crate::hl::chunks::ChunkInfo; + #[cfg(feature = "1.14.0")] + pub use crate::hl::chunks::ChunkInfoRef; pub use crate::hl::dataset::{Chunk, Dataset, DatasetBuilder}; pub use crate::hl::plist::dataset_access::*; pub use crate::hl::plist::dataset_create::*;