Skip to content

Chunks iterator #214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
- Support field renaming via `#[hdf5(rename = "new_name")]` helper attribute.
- Add a `ByteReader` which implements `std::io::{Read, Seek}` for 1D `u8`
datasets. Usage via `Dataset::as_byte_reader()`.
- Add `chunk_visit` to visit all chunks in a dataset.

### Changed

- The `H5Type` derive macro now uses `proc-macro-error` to emit error messages.
- MSRV is now `1.64.0` and Rust edition has now been bumped to 2021.
- Types in ChunkInfo has been changed to match HDF5

### Fixed

Expand Down
1 change: 1 addition & 0 deletions hdf5/src/hl.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod attribute;
pub mod chunks;
pub mod container;
pub mod dataset;
pub mod dataspace;
Expand Down
183 changes: 183 additions & 0 deletions hdf5/src/hl/chunks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
use crate::internal_prelude::*;

#[cfg(feature = "1.10.5")]
use hdf5_sys::h5d::{H5Dget_chunk_info, H5Dget_num_chunks};

#[cfg(feature = "1.10.5")]
#[derive(Clone, Debug, PartialEq, Eq)]
/// Information on a chunk in a Dataset
pub struct ChunkInfo {
/// Array with a size equal to the dataset’s rank whose elements contain 0-based
/// logical positions of the chunk’s first element in each dimension.
pub offset: Vec<hsize_t>,
/// Filter mask that indicates which filters were used with the chunk when written.
///
/// A zero value indicates that all enabled filters are applied on the chunk.
/// A filter is skipped if the bit corresponding to the filter’s position in
/// the pipeline (0 ≤ position < 32) is turned on.
pub filter_mask: u32,
/// Chunk address in the file.
pub addr: haddr_t,
/// Chunk size in bytes.
pub size: hsize_t,
}

#[cfg(feature = "1.10.5")]
impl ChunkInfo {
pub(crate) fn new(ndim: usize) -> Self {
let offset = vec![0; ndim];
Self { offset, filter_mask: 0, addr: 0, size: 0 }
}

/// Returns positional indices of disabled filters.
pub fn disabled_filters(&self) -> Vec<usize> {
(0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect()
}
}

#[cfg(feature = "1.10.5")]
pub(crate) fn chunk_info(ds: &Dataset, index: usize) -> Option<ChunkInfo> {
if !ds.is_chunked() {
return None;
}
h5lock!(ds.space().map_or(None, |s| {
let mut chunk_info = ChunkInfo::new(ds.ndim());
h5check(H5Dget_chunk_info(
ds.id(),
s.id(),
index as _,
chunk_info.offset.as_mut_ptr(),
&mut chunk_info.filter_mask,
&mut chunk_info.addr,
&mut chunk_info.size,
))
.map(|_| chunk_info)
.ok()
}))
}

#[cfg(feature = "1.10.5")]
pub(crate) fn get_num_chunks(ds: &Dataset) -> Option<usize> {
if !ds.is_chunked() {
return None;
}
h5lock!(ds.space().map_or(None, |s| {
let mut n: hsize_t = 0;
h5check(H5Dget_num_chunks(ds.id(), s.id(), &mut n)).map(|_| n as _).ok()
}))
}

#[cfg(feature = "1.14.0")]
mod v1_14_0 {
use super::*;
use hdf5_sys::h5d::H5Dchunk_iter;

/// Borrowed version of [ChunkInfo](crate::dataset::ChunkInfo)
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ChunkInfoRef<'a> {
pub offset: &'a [hsize_t],
pub filter_mask: u32,
pub addr: haddr_t,
pub size: hsize_t,
}

impl<'a> ChunkInfoRef<'a> {
/// Returns positional indices of disabled filters.
pub fn disabled_filters(&self) -> Vec<usize> {
(0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect()
}
}

impl<'a> From<ChunkInfoRef<'a>> for ChunkInfo {
fn from(val: ChunkInfoRef<'a>) -> Self {
Self {
offset: val.offset.to_owned(),
filter_mask: val.filter_mask,
addr: val.addr,
size: val.size,
}
}
}

#[repr(C)]
struct RustCallback<F> {
pub ndims: hsize_t,
pub callback: F,
}

extern "C" fn chunks_callback<F>(
offset: *const hsize_t, filter_mask: c_uint, addr: haddr_t, size: hsize_t,
op_data: *mut c_void,
) -> herr_t
where
F: FnMut(ChunkInfoRef) -> i32,
{
unsafe {
std::panic::catch_unwind(|| {
let data: *mut RustCallback<F> = op_data.cast::<RustCallback<F>>();
let ndims = (*data).ndims;
let callback = &mut (*data).callback;

let offset = std::slice::from_raw_parts(offset, ndims as usize);

let info = ChunkInfoRef { offset, filter_mask, addr, size };

callback(info)
})
.unwrap_or(-1)
}
}

pub(crate) fn visit<F>(ds: &Dataset, callback: F) -> Result<()>
where
F: for<'a> FnMut(ChunkInfoRef<'a>) -> i32,
{
let mut data = RustCallback::<F> { ndims: ds.ndim() as _, callback };

h5try!(H5Dchunk_iter(
ds.id(),
H5P_DEFAULT,
Some(chunks_callback::<F>),
std::ptr::addr_of_mut!(data).cast()
));

Ok(())
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn chunks_visit() {
with_tmp_file(|f| {
let ds = f.new_dataset::<i16>().no_chunk().shape((4, 4)).create("nochunk").unwrap();
assert_err_re!(visit(&ds, |_| 0), "not a chunked dataset");

let ds =
f.new_dataset::<i16>().shape([3, 2]).chunk([1, 1]).create("chunk").unwrap();
ds.write(&ndarray::arr2(&[[1, 2], [3, 4], [5, 6]])).unwrap();

let mut i = 0;
let f = |c: ChunkInfoRef| {
match i {
0 => assert_eq!(c.offset, [0, 0]),
1 => assert_eq!(c.offset, [0, 1]),
2 => assert_eq!(c.offset, [1, 0]),
3 => assert_eq!(c.offset, [1, 1]),
4 => assert_eq!(c.offset, [2, 0]),
5 => assert_eq!(c.offset, [2, 1]),
_ => unreachable!(),
}
assert_eq!(c.size, std::mem::size_of::<i16>() as u64);
i += 1;
0
};
visit(&ds, f).unwrap();
assert_eq!(i, 6);
})
}
}
}
#[cfg(feature = "1.14.0")]
pub use v1_14_0::*;
69 changes: 12 additions & 57 deletions hdf5/src/hl/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ use hdf5_sys::h5d::{
H5Dcreate2, H5Dcreate_anon, H5Dget_access_plist, H5Dget_create_plist, H5Dget_offset,
H5Dset_extent,
};
#[cfg(feature = "1.10.5")]
use hdf5_sys::h5d::{H5Dget_chunk_info, H5Dget_num_chunks};
use hdf5_sys::h5l::H5Ldelete;
use hdf5_sys::h5p::H5P_DEFAULT;
use hdf5_sys::h5z::H5Z_filter_t;
Expand Down Expand Up @@ -66,36 +64,6 @@ impl Deref for Dataset {
}
}

#[cfg(feature = "1.10.5")]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ChunkInfo {
/// Array with a size equal to the dataset’s rank whose elements contain 0-based
/// logical positions of the chunk’s first element in each dimension.
pub offset: Vec<u64>,
/// Filter mask that indicates which filters were used with the chunk when written.
/// A zero value indicates that all enabled filters are applied on the chunk.
/// A filter is skipped if the bit corresponding to the filter’s position in
/// the pipeline (0 ≤ position < 32) is turned on.
pub filter_mask: u32,
/// Chunk address in the file.
pub addr: u64,
/// Chunk size in bytes.
pub size: u64,
}

#[cfg(feature = "1.10.5")]
impl ChunkInfo {
pub(crate) fn new(ndim: usize) -> Self {
let offset = vec![0; ndim];
Self { offset, filter_mask: 0, addr: 0, size: 0 }
}

/// Returns positional indices of disabled filters.
pub fn disabled_filters(&self) -> Vec<usize> {
(0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect()
}
}

impl Dataset {
/// Returns a copy of the dataset access property list.
pub fn access_plist(&self) -> Result<DatasetAccess> {
Expand Down Expand Up @@ -135,42 +103,29 @@ impl Dataset {
#[cfg(feature = "1.10.5")]
/// Returns the number of chunks if the dataset is chunked.
pub fn num_chunks(&self) -> Option<usize> {
if !self.is_chunked() {
return None;
}
h5lock!(self.space().map_or(None, |s| {
let mut n: hsize_t = 0;
h5check(H5Dget_num_chunks(self.id(), s.id(), &mut n)).map(|_| n as _).ok()
}))
crate::hl::chunks::get_num_chunks(self)
}

#[cfg(feature = "1.10.5")]
/// Retrieves the chunk information for the chunk specified by its index.
pub fn chunk_info(&self, index: usize) -> Option<ChunkInfo> {
if !self.is_chunked() {
return None;
}
h5lock!(self.space().map_or(None, |s| {
let mut chunk_info = ChunkInfo::new(self.ndim());
h5check(H5Dget_chunk_info(
self.id(),
s.id(),
index as _,
chunk_info.offset.as_mut_ptr(),
&mut chunk_info.filter_mask,
&mut chunk_info.addr,
&mut chunk_info.size,
))
.map(|_| chunk_info)
.ok()
}))
pub fn chunk_info(&self, index: usize) -> Option<crate::dataset::ChunkInfo> {
crate::hl::chunks::chunk_info(self, index)
}

/// Returns the chunk shape if the dataset is chunked.
pub fn chunk(&self) -> Option<Vec<Ix>> {
self.dcpl().map_or(None, |pl| pl.chunk())
}

/// Visit all chunks
#[cfg(feature = "1.14.0")]
pub fn chunks_visit<F>(&self, callback: F) -> Result<()>
where
F: for<'a> FnMut(crate::dataset::ChunkInfoRef<'a>) -> i32,
{
crate::hl::chunks::visit(self, callback)
}

/// Returns the absolute byte offset of the dataset in the file if such offset is defined
/// (which is not the case for datasets that are chunked, compact or not allocated yet).
pub fn offset(&self) -> Option<u64> {
Expand Down
4 changes: 3 additions & 1 deletion hdf5/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ mod export {

pub mod dataset {
#[cfg(feature = "1.10.5")]
pub use crate::hl::dataset::ChunkInfo;
pub use crate::hl::chunks::ChunkInfo;
#[cfg(feature = "1.14.0")]
pub use crate::hl::chunks::ChunkInfoRef;
pub use crate::hl::dataset::{Chunk, Dataset, DatasetBuilder};
pub use crate::hl::plist::dataset_access::*;
pub use crate::hl::plist::dataset_create::*;
Expand Down