From 208d74b88acfca4f969c2d669684e6da6c6c7c0b Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 27 Apr 2023 13:47:16 -0600 Subject: [PATCH 01/28] add support for multiple recordings in archives --- sigmf/archive.py | 103 +++++++++++++++++++---------------- sigmf/archivereader.py | 105 +++++++++++++++++++----------------- sigmf/sigmffile.py | 44 ++++++++++++--- tests/conftest.py | 38 +++++++++---- tests/test_archive.py | 63 +++++++++++++++------- tests/test_archivereader.py | 22 +++++++- tests/test_sigmffile.py | 22 ++++++-- tests/test_validation.py | 10 ++-- tests/testdata.py | 17 +++++- 9 files changed, 282 insertions(+), 142 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index de6bd50..bd206bd 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -10,6 +10,8 @@ import shutil import tarfile import tempfile +from typing import BinaryIO, Iterable + from .error import SigMFFileError @@ -21,7 +23,7 @@ class SigMFArchive(): - """Archive a SigMFFile. + """Archive one or more `SigMFFile`s. A `.sigmf` file must include both valid metadata and data. If `self.data_file` is not set or the requested output file @@ -29,52 +31,41 @@ class SigMFArchive(): Parameters: - sigmffile -- A SigMFFile object with valid metadata and data_file + sigmffile -- An iterable of SigMFFile objects with valid metadata and data_files name -- path to archive file to create. If file exists, overwrite. - If `name` doesn't end in .sigmf, it will be appended. - For example: if `name` == "/tmp/archive1", then the - following archive will be created: - /tmp/archive1.sigmf - - archive1/ - - archive1.sigmf-meta - - archive1.sigmf-data + If `name` doesn't end in .sigmf, it will be appended. The + `self.path` instance variable will be updated upon + successful writing of the archive to point to the final + archive path. + fileobj -- If `fileobj` is specified, it is used as an alternative to - a file object opened in binary mode for `name`. It is - supposed to be at position 0. `name` is not required, but - if specified will be used to determine the directory and - file names within the archive. `fileobj` won't be closed. - For example: if `name` == "archive1" and fileobj is given, - a tar archive will be written to fileobj with the - following structure: - - archive1/ - - archive1.sigmf-meta - - archive1.sigmf-data + a file object opened in binary mode for `name`. If + `fileobj` is an open tarfile, it will be appended to. It is + supposed to be at position 0. `fileobj` won't be closed. If + `fileobj` is given, `name` has no effect. """ - def __init__(self, sigmffile, name=None, fileobj=None): - self.sigmffile = sigmffile + def __init__(self, sigmffiles : Iterable["SigMFFile"], name : str = None, fileobj : BinaryIO =None): + self.sigmffiles = sigmffiles + self.name = name self.fileobj = fileobj self._check_input() - archive_name = self._get_archive_name() + mode = "a" if fileobj is not None else "w" sigmf_fileobj = self._get_output_fileobj() - sigmf_archive = tarfile.TarFile(mode="w", - fileobj=sigmf_fileobj, - format=tarfile.PAX_FORMAT) - tmpdir = tempfile.mkdtemp() - sigmf_md_filename = archive_name + SIGMF_METADATA_EXT - sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename) - sigmf_data_filename = archive_name + SIGMF_DATASET_EXT - sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename) - - with open(sigmf_md_path, "w") as mdfile: - self.sigmffile.dump(mdfile, pretty=True) - - shutil.copy(self.sigmffile.data_file, sigmf_data_path) - + try: + sigmf_archive = tarfile.TarFile(mode=mode, + fileobj=sigmf_fileobj, + format=tarfile.PAX_FORMAT) + except tarfile.ReadError: + # fileobj doesn't contain any archives yet, so reopen in 'w' mode + sigmf_archive = tarfile.TarFile(mode='w', + fileobj=sigmf_fileobj, + format=tarfile.PAX_FORMAT) + def chmod(tarinfo): if tarinfo.isdir(): tarinfo.mode = 0o755 # dwrxw-rw-r @@ -82,19 +73,33 @@ def chmod(tarinfo): tarinfo.mode = 0o644 # -wr-r--r-- return tarinfo - sigmf_archive.add(tmpdir, arcname=archive_name, filter=chmod) + for sigmffile in self.sigmffiles: + with tempfile.TemporaryDirectory() as tmpdir: + sigmf_md_filename = sigmffile.name + SIGMF_METADATA_EXT + sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename) + sigmf_data_filename = sigmffile.name + SIGMF_DATASET_EXT + sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename) + + with open(sigmf_md_path, "w") as mdfile: + sigmffile.dump(mdfile, pretty=True) + + shutil.copy(sigmffile.data_file, sigmf_data_path) + sigmf_archive.add(tmpdir, arcname=sigmffile.name, filter=chmod) + sigmf_archive.close() if not fileobj: sigmf_fileobj.close() - - shutil.rmtree(tmpdir) + else: + sigmf_fileobj.seek(0) # ensure next open can read this as a tar self.path = sigmf_archive.name def _check_input(self): self._ensure_name_has_correct_extension() - self._ensure_data_file_set() - self._validate_sigmffile_metadata() + for sigmffile in self.sigmffiles: + self._ensure_sigmffile_name_set(sigmffile) + self._ensure_data_file_set(sigmffile) + self._validate_sigmffile_metadata(sigmffile) def _ensure_name_has_correct_extension(self): name = self.name @@ -110,13 +115,21 @@ def _ensure_name_has_correct_extension(self): self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT - def _ensure_data_file_set(self): - if not self.sigmffile.data_file: + @staticmethod + def _ensure_sigmffile_name_set(sigmffile): + if not sigmffile.name: + err = "the `name` attribute must be set to pass to `SigMFArchive`" + raise SigMFFileError(err) + + @staticmethod + def _ensure_data_file_set(sigmffile): + if not sigmffile.data_file: err = "no data file - use `set_data_file`" raise SigMFFileError(err) - def _validate_sigmffile_metadata(self): - self.sigmffile.validate() + @staticmethod + def _validate_sigmffile_metadata(sigmffile): + sigmffile.validate() def _get_archive_name(self): if self.fileobj and not self.name: diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 5759b74..09765fa 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -28,65 +28,74 @@ class SigMFArchiveReader(): """ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None): self.name = name - if self.name is not None: - if not name.endswith(SIGMF_ARCHIVE_EXT): - err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) - raise SigMFFileError(err) + try: + if self.name is not None: + if not name.endswith(SIGMF_ARCHIVE_EXT): + err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) + raise SigMFFileError(err) - tar_obj = tarfile.open(self.name) + tar_obj = tarfile.open(self.name) - elif archive_buffer is not None: - tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') + elif archive_buffer is not None: + tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') - else: - raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None') - - json_contents = None - data_offset_size = None - - for memb in tar_obj.getmembers(): - if memb.isdir(): # memb.type == tarfile.DIRTYPE: - # the directory structure will be reflected in the member name - continue - - elif memb.isfile(): # memb.type == tarfile.REGTYPE: - if memb.name.endswith(SIGMF_METADATA_EXT): - json_contents = memb.name - if data_offset_size is None: - # consider a warnings.warn() here; the datafile should be earlier in the - # archive than the metadata, so that updating it (like, adding an annotation) - # is fast. - pass - with tar_obj.extractfile(memb) as memb_fid: - json_contents = memb_fid.read() - - elif memb.name.endswith(SIGMF_DATASET_EXT): - data_offset_size = memb.offset_data, memb.size - - else: - print('A regular file', memb.name, 'was found but ignored in the archive') else: - print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.') - - if data_offset_size is None: - raise SigMFFileError('No .sigmf-data file found in archive!') + raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None') + + json_contents = None + data_offset_size = None + self.sigmffiles = [] + data_found = False + + for memb in tar_obj.getmembers(): + if memb.isdir(): # memb.type == tarfile.DIRTYPE: + # the directory structure will be reflected in the member name + continue + + elif memb.isfile(): # memb.type == tarfile.REGTYPE: + if memb.name.endswith(SIGMF_METADATA_EXT): + json_contents = memb.name + if data_offset_size is None: + # consider a warnings.warn() here; the datafile should be earlier in the + # archive than the metadata, so that updating it (like, adding an annotation) + # is fast. + pass + with tar_obj.extractfile(memb) as memb_fid: + json_contents = memb_fid.read() + + elif memb.name.endswith(SIGMF_DATASET_EXT): + data_offset_size = memb.offset_data, memb.size + data_found = True + + else: + print('A regular file', memb.name, 'was found but ignored in the archive') + else: + print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.') - self.sigmffile = SigMFFile(metadata=json_contents) - valid_md = self.sigmffile.validate() + if data_offset_size is not None and json_contents is not None: + sigmffile = SigMFFile(metadata=json_contents) + valid_md = sigmffile.validate() - self.sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0], - size_bytes=data_offset_size[1], map_readonly=map_readonly) + sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0], + size_bytes=data_offset_size[1], map_readonly=map_readonly) - self.ndim = self.sigmffile.ndim - self.shape = self.sigmffile.shape + self.ndim = sigmffile.ndim + self.shape = sigmffile.shape + self.sigmffiles.append(sigmffile) + data_offset_size = None + json_contents = None + - tar_obj.close() + if not data_found: + raise SigMFFileError('No .sigmf-data file found in archive!') + finally: + tar_obj.close() def __len__(self): - return self.sigmffile.__len__() + return len(self.sigmffiles) def __iter__(self): - return self.sigmffile.__iter__() + return self.sigmffiles.__iter__() def __getitem__(self, sli): - return self.sigmffile.__getitem__(sli) + return self.sigmffiles.__getitem__(sli) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 4fff3e6..a196ed1 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -148,7 +148,7 @@ class SigMFFile(SigMFMetafile): ] VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS} - def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True): + def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True, name=None): ''' API for SigMF I/O @@ -164,6 +164,13 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu When True will skip calculating hash on data_file (if present) to check against metadata. map_readonly: bool, default True Indicates whether assignments on the numpy.memmap are allowed. + name: Name used for directory and filenames if archived. + For example, given `name=archive1`, then passing this + sigmffile to SigMFArchive will add the following files + to the archive: + - archive1/ + - archive1.sigmf-meta + - archive1.sigmf-data ''' super(SigMFFile, self).__init__() self.data_file = None @@ -183,6 +190,7 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu self.set_global_info(global_info) if data_file is not None: self.set_data_file(data_file, skip_checksum, map_readonly=map_readonly) + self.name = name def __len__(self): return self._memmap.shape[0] @@ -212,6 +220,20 @@ def __getitem__(self, sli): else: raise ValueError("unhandled ndim in SigMFFile.__getitem__(); this shouldn't happen") return a + + def __eq__(self, other): + """Define equality between two `SigMFFile`s. + + Rely on the `core:sha512` value in the metadata to decide whether + `data_file` is the same since the same sigmf archive could be extracted + twice to two different temp directories and the SigMFFiles should still + be equivalent. + + """ + if isinstance(other, SigMFFile): + return self._metadata == other._metadata + + return False def _get_start_offset(self): """ @@ -511,13 +533,23 @@ def validate(self): version = self.get_global_field(self.VERSION_KEY) validate.validate(self._metadata, self.get_schema()) - def archive(self, name=None, fileobj=None): + def archive(self, sigmffile_name=None, archive_name=None, fileobj=None): """Dump contents to SigMF archive format. - `name` and `fileobj` are passed to SigMFArchive and are defined there. + `sigmffile_name` determines the directory and filenames inside the archive. If + not specified, you must have set the instance variable `self.name` + + `arhive_name` is passed to SigMFArchive `name` and `fileobj` is passed to + SigMFArchive `fileobj`. """ - archive = SigMFArchive(self, name, fileobj) + if sigmffile_name is not None: + self.name = sigmffile_name + + if archive_name is None: + archive_name = self.name + + archive = SigMFArchive([self], archive_name, fileobj) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): @@ -891,13 +923,13 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None): def fromarchive(archive_path, dir=None): - """Extract an archive and return a SigMFFile. + """Extract an archive and return containing SigMFFiles. The `dir` parameter is no longer used as this function has been changed to access SigMF archives without extracting them. """ from .archivereader import SigMFArchiveReader - return SigMFArchiveReader(archive_path).sigmffile + return SigMFArchiveReader(archive_path).sigmffiles def fromfile(filename, skip_checksum=False): diff --git a/tests/conftest.py b/tests/conftest.py index 9a8aa64..295998b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,22 +24,38 @@ from sigmf.sigmffile import SigMFFile -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1, TEST_FLOAT32_DATA_2, TEST_METADATA_2 @pytest.fixture -def test_data_file(): +def test_data_file_1(): with tempfile.NamedTemporaryFile() as temp: - TEST_FLOAT32_DATA.tofile(temp.name) + TEST_FLOAT32_DATA_1.tofile(temp.name) yield temp +@pytest.yield_fixture +def test_data_file_2(): + with tempfile.NamedTemporaryFile() as t: + TEST_FLOAT32_DATA_2.tofile(t.name) + yield t + + +@pytest.fixture +def test_sigmffile(test_data_file_1): + f = SigMFFile(name='test1') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_1)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_1.name) + assert f._metadata == TEST_METADATA_1 + return f @pytest.fixture -def test_sigmffile(test_data_file): - sigf = SigMFFile() - sigf.set_global_field("core:datatype", "rf32_le") - sigf.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA)) - sigf.add_capture(start_index=0) - sigf.set_data_file(test_data_file.name) - assert sigf._metadata == TEST_METADATA - return sigf +def test_alternate_sigmffile(test_data_file_2): + f = SigMFFile(name='test2') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_2)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_2.name) + assert f._metadata == TEST_METADATA_2 + return f diff --git a/tests/test_archive.py b/tests/test_archive.py index 5c3d67b..7b08e9a 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -8,14 +8,14 @@ import pytest import jsonschema -from sigmf import error -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT +from sigmf import error, sigmffile +from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 -def create_test_archive(test_sigmffile, tmpfile): - sigmf_archive = test_sigmffile.archive(fileobj=tmpfile) +def create_test_archive(test_sigmffile, tmpfile, sigmffile_name="test"): + sigmf_archive = test_sigmffile.archive(sigmffile_name=sigmffile_name, fileobj=tmpfile) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r", format=tarfile.PAX_FORMAT) return sigmf_tarfile @@ -24,20 +24,20 @@ def test_without_data_file_throws_fileerror(test_sigmffile): test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=temp.name) + test_sigmffile.archive(archive_name=temp.name) def test_invalid_md_throws_validationerror(test_sigmffile): del test_sigmffile._metadata["global"]["core:datatype"] # required field with tempfile.NamedTemporaryFile() as temp: with pytest.raises(jsonschema.exceptions.ValidationError): - test_sigmffile.archive(name=temp.name) + test_sigmffile.archive(archive_name=temp.name) def test_name_wrong_extension_throws_fileerror(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=temp.name + ".zip") + test_sigmffile.archive(archive_name=temp.name + ".zip") def test_fileobj_extension_ignored(test_sigmffile): @@ -47,17 +47,18 @@ def test_fileobj_extension_ignored(test_sigmffile): def test_name_used_in_fileobj(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: - sigmf_archive = test_sigmffile.archive(name="testarchive", fileobj=temp) + sigmf_archive = test_sigmffile.archive(archive_name="testarchive", fileobj=temp) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r") basedir, file1, file2 = sigmf_tarfile.getmembers() - assert basedir.name == "testarchive" + assert basedir.name == test_sigmffile.name + assert sigmf_tarfile.name == temp.name def filename(tarinfo): path_root, _ = path.splitext(tarinfo.name) return path.split(path_root)[-1] - assert filename(file1) == "testarchive" - assert filename(file2) == "testarchive" + assert filename(file1) == test_sigmffile.name + assert filename(file2) == test_sigmffile.name def test_fileobj_not_closed(test_sigmffile): @@ -77,7 +78,7 @@ def test_unwritable_name_throws_fileerror(test_sigmffile): # so use invalid filename unwritable_file = '/bad_name/' with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=unwritable_file) + test_sigmffile.archive(archive_name=unwritable_file) def test_tarfile_layout(test_sigmffile): @@ -93,20 +94,44 @@ def test_tarfile_names_and_extensions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) basedir, file1, file2 = sigmf_tarfile.getmembers() - archive_name = basedir.name - assert archive_name == path.split(temp.name)[-1] + sigmffile_name = basedir.name + assert sigmffile_name == test_sigmffile.name + archive_name = sigmf_tarfile.name + assert archive_name == temp.name + path.split(temp.name)[-1] file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT} file1_name, file1_ext = path.splitext(file1.name) assert file1_ext in file_extensions - assert path.split(file1_name)[-1] == archive_name + assert path.split(file1_name)[-1] == test_sigmffile.name file_extensions.remove(file1_ext) file2_name, file2_ext = path.splitext(file2.name) - assert path.split(file2_name)[-1] == archive_name + assert path.split(file2_name)[-1] == test_sigmffile.name assert file2_ext in file_extensions +def test_sf_fromarchive_multirec(test_sigmffile, test_alternate_sigmffile): + """`SigMFFile.fromarchive` should return list of SigMFFiles.""" + with tempfile.NamedTemporaryFile(delete=True) as tf: + # Create a multi-recording archive + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, name=tf.name) + output_sigmf_files = sigmffile.fromarchive(archive_path=arch.path) + assert len(output_sigmf_files) == 2 + assert input_sigmffiles == output_sigmf_files + + + +def test_multirec_archive_into_fileobj(test_sigmffile): + with tempfile.NamedTemporaryFile() as t: + # add first sigmffile to the fileobj t + create_test_archive(test_sigmffile, t, sigmffile_name="test1") + # add a second one to the same fileobj + multirec_tar = create_test_archive(test_sigmffile, t, sigmffile_name="test2") + members = multirec_tar.getmembers() + assert len(members) == 6 # 2 directories and 2 files per directory + def test_tarfile_persmissions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: @@ -130,14 +155,14 @@ def test_contents(test_sigmffile): bytestream_reader = codecs.getreader("utf-8") # bytes -> str mdfile_reader = bytestream_reader(sigmf_tarfile.extractfile(mdfile)) - assert json.load(mdfile_reader) == TEST_METADATA + assert json.load(mdfile_reader) == TEST_METADATA_1 datfile_reader = sigmf_tarfile.extractfile(datfile) # calling `fileno` on `tarfile.ExFileObject` throws error (?), but # np.fromfile requires it, so we need this extra step data = np.frombuffer(datfile_reader.read(), dtype=np.float32) - assert np.array_equal(data, TEST_FLOAT32_DATA) + assert np.array_equal(data, TEST_FLOAT32_DATA_1) def test_tarfile_type(test_sigmffile): diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index 2b5b449..03f940e 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -9,7 +9,7 @@ from sigmf import error from sigmf import SigMFFile, SigMFArchiveReader -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT +from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive def test_access_data_without_untar(test_sigmffile): global_info = { @@ -52,3 +52,23 @@ def test_access_data_without_untar(test_sigmffile): meta.tofile(archive_filename, toarchive=True) archi = SigMFArchiveReader(archive_filename, skip_checksum=True) + +def test_extract_single_recording(test_sigmffile): + with tempfile.NamedTemporaryFile() as tf: + expected_sigmffile = test_sigmffile + arch = SigMFArchive([expected_sigmffile], name=tf.name) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 1 + actual_sigmffile = reader[0] + assert expected_sigmffile == actual_sigmffile + + +def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as tf: + # Create a multi-recording archive + expected_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(expected_sigmffiles, name=tf.name) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 + for expected in expected_sigmffiles: + assert expected in reader.sigmffiles \ No newline at end of file diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index e371964..6ec595b 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -35,8 +35,8 @@ class TestClassMethods(unittest.TestCase): def setUp(self): '''assure tests have a valid SigMF object to work with''' _, temp_path = tempfile.mkstemp() - TEST_FLOAT32_DATA.tofile(temp_path) - self.sigmf_object = SigMFFile(TEST_METADATA, data_file=temp_path) + TEST_FLOAT32_DATA_1.tofile(temp_path) + self.sigmf_object = SigMFFile(TEST_METADATA_1, data_file=temp_path) def test_iterator_basic(self): '''make sure default batch_size works''' @@ -84,13 +84,25 @@ def test_add_annotation(): sigf.add_annotation(start_index=0, length=128, metadata=meta) +def test_add_annotation_with_duplicate_key(): + f = SigMFFile() + f.add_capture(start_index=0) + m1 = {"latitude": 40.0, "longitude": -105.0} + f.add_annotation(start_index=0, length=128, metadata=m1) + m2 = {"latitude": 50.0, "longitude": -115.0} + f.add_annotation(start_index=0, length=128, metadata=m2) + assert len(f.get_annotations(64)) == 2 + + def test_fromarchive(test_sigmffile): print("test_sigmffile is:\n", test_sigmffile) tf = tempfile.mkstemp()[1] td = tempfile.mkdtemp() - archive_path = test_sigmffile.archive(name=tf) + archive_path = test_sigmffile.archive(archive_name=tf) result = sigmffile.fromarchive(archive_path=archive_path, dir=td) - assert result._metadata == test_sigmffile._metadata == TEST_METADATA + assert len(result) == 1 + result = result[0] + assert result._metadata == test_sigmffile._metadata == TEST_METADATA_1 os.remove(tf) shutil.rmtree(td) @@ -163,7 +175,7 @@ def test_multichannel_seek(): def test_key_validity(): '''assure the keys in test metadata are valid''' - for top_key, top_val in TEST_METADATA.items(): + for top_key, top_val in TEST_METADATA_1.items(): if type(top_val) is dict: for core_key in top_val.keys(): assert core_key in vars(SigMFFile)[f'VALID_{top_key.upper()}_KEYS'] diff --git a/tests/test_validation.py b/tests/test_validation.py index 75cf048..4d4f3b5 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -26,18 +26,18 @@ from jsonschema.exceptions import ValidationError -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 def test_valid_data(): '''assure the supplied metadata is OK''' - invalid_metadata = dict(TEST_METADATA) - SigMFFile(TEST_METADATA).validate() + invalid_metadata = dict(TEST_METADATA_1) + SigMFFile(TEST_METADATA_1).validate() class FailingCases(unittest.TestCase): '''Cases where the validator should throw an exception.''' def setUp(self): - self.metadata = dict(TEST_METADATA) + self.metadata = dict(TEST_METADATA_1) def test_extra_top_level_key(self): '''no extra keys allowed on the top level''' @@ -83,7 +83,7 @@ def test_invalid_annotation_order(self): def test_invalid_hash(self): _, temp_path = tempfile.mkstemp() - TEST_FLOAT32_DATA.tofile(temp_path) + TEST_FLOAT32_DATA_1.tofile(temp_path) self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = 'derp' with self.assertRaises(sigmf.error.SigMFFileError): SigMFFile(metadata=self.metadata, data_file=temp_path) diff --git a/tests/testdata.py b/tests/testdata.py index 0a0d5ed..a744ae1 100644 --- a/tests/testdata.py +++ b/tests/testdata.py @@ -25,9 +25,9 @@ from sigmf import __version__ from sigmf import SigMFFile -TEST_FLOAT32_DATA = np.arange(16, dtype=np.float32) +TEST_FLOAT32_DATA_1 = np.arange(16, dtype=np.float32) -TEST_METADATA = { +TEST_METADATA_1 = { SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], SigMFFile.GLOBAL_KEY: { @@ -38,6 +38,19 @@ } } +TEST_FLOAT32_DATA_2 = np.arange(16, 32, dtype=np.float32) + +TEST_METADATA_2 = { + SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.GLOBAL_KEY: { + SigMFFile.DATATYPE_KEY: 'rf32_le', + SigMFFile.HASH_KEY: 'a85018cf117a4704596c0f360dbc3fce2d0d561966d865b9b8a356634161bde6a528c5181837890a9f4d54243e2e8eaf7e19bd535e54e3e34aabf76793723d03', + SigMFFile.NUM_CHANNELS_KEY: 1, + SigMFFile.VERSION_KEY: __version__ + } +} + # Data0 is a test of a compliant two capture recording TEST_U8_DATA0 = list(range(256)) TEST_U8_META0 = { From 44462f189020b938a234e4d9d3743329e26e601e Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 1 May 2023 10:17:56 -0600 Subject: [PATCH 02/28] fix SigMFArchiveReader error --- sigmf/archivereader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 09765fa..7192f88 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -28,6 +28,7 @@ class SigMFArchiveReader(): """ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None): self.name = name + tar_obj = None try: if self.name is not None: if not name.endswith(SIGMF_ARCHIVE_EXT): @@ -89,7 +90,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu if not data_found: raise SigMFFileError('No .sigmf-data file found in archive!') finally: - tar_obj.close() + if tar_obj: tar_obj.close() def __len__(self): return len(self.sigmffiles) From 832b7310f25556aa073365c09e3279c580459589 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Wed, 3 May 2023 11:51:00 -0600 Subject: [PATCH 03/28] support single or multiple sigmffiles in archive __init__() --- sigmf/archive.py | 12 +++++++++--- tests/test_archivereader.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index bd206bd..70c9685 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -10,7 +10,9 @@ import shutil import tarfile import tempfile -from typing import BinaryIO, Iterable +from typing import BinaryIO, Iterable, Union + +import sigmf from .error import SigMFFileError @@ -46,8 +48,12 @@ class SigMFArchive(): supposed to be at position 0. `fileobj` won't be closed. If `fileobj` is given, `name` has no effect. """ - def __init__(self, sigmffiles : Iterable["SigMFFile"], name : str = None, fileobj : BinaryIO =None): - self.sigmffiles = sigmffiles + def __init__(self, sigmffiles : Union["SigMFFile", Iterable["SigMFFile"]], name : str = None, fileobj : BinaryIO =None): + + if isinstance(sigmffiles[0], sigmf.sigmffile.SigMFFile): + self.sigmffiles = sigmffiles + else: + self.sigmffiles = [sigmffiles] self.name = name self.fileobj = fileobj diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index 03f940e..9325295 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -56,7 +56,7 @@ def test_access_data_without_untar(test_sigmffile): def test_extract_single_recording(test_sigmffile): with tempfile.NamedTemporaryFile() as tf: expected_sigmffile = test_sigmffile - arch = SigMFArchive([expected_sigmffile], name=tf.name) + arch = SigMFArchive(expected_sigmffile, name=tf.name) reader = SigMFArchiveReader(arch.path) assert len(reader) == 1 actual_sigmffile = reader[0] From 8d25adf298a0bf3668f22003ef75e1a0430ad0b2 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Wed, 3 May 2023 14:21:54 -0600 Subject: [PATCH 04/28] renamed archive "name" to "path", allow os.PathLike --- sigmf/archive.py | 36 ++++++++++++++++++------------------ tests/test_archive.py | 11 ++++++++++- tests/test_archivereader.py | 4 ++-- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 70c9685..22c1cd3 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -35,27 +35,27 @@ class SigMFArchive(): sigmffile -- An iterable of SigMFFile objects with valid metadata and data_files - name -- path to archive file to create. If file exists, overwrite. - If `name` doesn't end in .sigmf, it will be appended. The + path -- path to archive file to create. If file exists, overwrite. + If `path` doesn't end in .sigmf, it will be appended. The `self.path` instance variable will be updated upon successful writing of the archive to point to the final archive path. fileobj -- If `fileobj` is specified, it is used as an alternative to - a file object opened in binary mode for `name`. If + a file object opened in binary mode for `path`. If `fileobj` is an open tarfile, it will be appended to. It is supposed to be at position 0. `fileobj` won't be closed. If - `fileobj` is given, `name` has no effect. + `fileobj` is given, `path` has no effect. """ - def __init__(self, sigmffiles : Union["SigMFFile", Iterable["SigMFFile"]], name : str = None, fileobj : BinaryIO =None): + def __init__(self, sigmffiles : Union["SigMFFile", Iterable["SigMFFile"]], path : Union[str, os.PathLike] = None, fileobj : BinaryIO =None): if isinstance(sigmffiles[0], sigmf.sigmffile.SigMFFile): self.sigmffiles = sigmffiles else: self.sigmffiles = [sigmffiles] - self.name = name + self.path = str(path) self.fileobj = fileobj self._check_input() @@ -101,25 +101,25 @@ def chmod(tarinfo): self.path = sigmf_archive.name def _check_input(self): - self._ensure_name_has_correct_extension() + self._ensure_path_has_correct_extension() for sigmffile in self.sigmffiles: self._ensure_sigmffile_name_set(sigmffile) self._ensure_data_file_set(sigmffile) self._validate_sigmffile_metadata(sigmffile) - def _ensure_name_has_correct_extension(self): - name = self.name - if name is None: + def _ensure_path_has_correct_extension(self): + path = self.path + if path is None: return - has_extension = "." in name - has_correct_extension = name.endswith(SIGMF_ARCHIVE_EXT) + has_extension = "." in path + has_correct_extension = path.endswith(SIGMF_ARCHIVE_EXT) if has_extension and not has_correct_extension: - apparent_ext = os.path.splitext(name)[-1] + apparent_ext = os.path.splitext(path)[-1] err = "extension {} != {}".format(apparent_ext, SIGMF_ARCHIVE_EXT) raise SigMFFileError(err) - self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT + self.path = path if has_correct_extension else path + SIGMF_ARCHIVE_EXT @staticmethod def _ensure_sigmffile_name_set(sigmffile): @@ -138,10 +138,10 @@ def _validate_sigmffile_metadata(sigmffile): sigmffile.validate() def _get_archive_name(self): - if self.fileobj and not self.name: + if self.fileobj and not self.path: pathname = self.fileobj.name else: - pathname = self.name + pathname = self.path filename = os.path.split(pathname)[-1] archive_name, archive_ext = os.path.splitext(filename) @@ -154,7 +154,7 @@ def _get_output_fileobj(self): if self.fileobj: err = "fileobj {!r} is not byte-writable".format(self.fileobj) else: - err = "can't open {!r} for writing".format(self.name) + err = "can't open {!r} for writing".format(self.path) raise SigMFFileError(err) @@ -165,6 +165,6 @@ def _get_open_fileobj(self): fileobj = self.fileobj fileobj.write(bytes()) # force exception if not byte-writable else: - fileobj = open(self.name, "wb") + fileobj = open(self.path, "wb") return fileobj diff --git a/tests/test_archive.py b/tests/test_archive.py index 7b08e9a..da4b5ac 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -1,5 +1,6 @@ import codecs import json +from pathlib import Path import tarfile import tempfile from os import path @@ -116,7 +117,7 @@ def test_sf_fromarchive_multirec(test_sigmffile, test_alternate_sigmffile): with tempfile.NamedTemporaryFile(delete=True) as tf: # Create a multi-recording archive input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] - arch = SigMFArchive(input_sigmffiles, name=tf.name) + arch = SigMFArchive(input_sigmffiles, path=tf.name) output_sigmf_files = sigmffile.fromarchive(archive_path=arch.path) assert len(output_sigmf_files) == 2 assert input_sigmffiles == output_sigmf_files @@ -169,3 +170,11 @@ def test_tarfile_type(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) assert sigmf_tarfile.format == tarfile.PAX_FORMAT + +def test_create_archive_pathlike(test_sigmffile, test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as t: + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, path=Path(t.name)) + output_sigmf_files = sigmffile.fromarchive(archive_path=arch.path) + assert len(output_sigmf_files) == 2 + assert input_sigmffiles == output_sigmf_files diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index 9325295..626f6ce 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -56,7 +56,7 @@ def test_access_data_without_untar(test_sigmffile): def test_extract_single_recording(test_sigmffile): with tempfile.NamedTemporaryFile() as tf: expected_sigmffile = test_sigmffile - arch = SigMFArchive(expected_sigmffile, name=tf.name) + arch = SigMFArchive(expected_sigmffile, path=tf.name) reader = SigMFArchiveReader(arch.path) assert len(reader) == 1 actual_sigmffile = reader[0] @@ -67,7 +67,7 @@ def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile): with tempfile.NamedTemporaryFile() as tf: # Create a multi-recording archive expected_sigmffiles = [test_sigmffile, test_alternate_sigmffile] - arch = SigMFArchive(expected_sigmffiles, name=tf.name) + arch = SigMFArchive(expected_sigmffiles, path=tf.name) reader = SigMFArchiveReader(arch.path) assert len(reader) == 2 for expected in expected_sigmffiles: From 4f5845340ca5d5d440a4a1919f4c84eb3659ef08 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Wed, 3 May 2023 14:35:32 -0600 Subject: [PATCH 05/28] Fixed bug in checking sigmffiles type --- sigmf/archive.py | 10 ++++++++-- sigmf/sigmffile.py | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 22c1cd3..a728280 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -6,6 +6,7 @@ """Create and extract SigMF archives.""" +import collections import os import shutil import tarfile @@ -50,10 +51,15 @@ class SigMFArchive(): """ def __init__(self, sigmffiles : Union["SigMFFile", Iterable["SigMFFile"]], path : Union[str, os.PathLike] = None, fileobj : BinaryIO =None): - if isinstance(sigmffiles[0], sigmf.sigmffile.SigMFFile): + if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile): + self.sigmffiles = [sigmffiles] + elif hasattr(collections, "Iterable") and isinstance(sigmffiles, collections.Iterable): + self.sigmffiles = sigmffiles + elif isinstance(sigmffiles, collections.abc.Iterable): # python 3.10 self.sigmffiles = sigmffiles else: - self.sigmffiles = [sigmffiles] + raise SigMFFileError("Unknown type for sigmffiles argument!") + self.path = str(path) self.fileobj = fileobj diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index a196ed1..86cb086 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -549,7 +549,7 @@ def archive(self, sigmffile_name=None, archive_name=None, fileobj=None): if archive_name is None: archive_name = self.name - archive = SigMFArchive([self], archive_name, fileobj) + archive = SigMFArchive(self, archive_name, fileobj) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): From 89242c81850bfa74b6248ea18399d82bde9433f3 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Wed, 3 May 2023 15:39:23 -0600 Subject: [PATCH 06/28] add test for missing name --- tests/test_sigmffile.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 6ec595b..f772269 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -23,9 +23,11 @@ import tempfile import json import numpy as np +import pytest import unittest from sigmf import sigmffile, utils +from sigmf.error import SigMFFileError from sigmf.sigmffile import SigMFFile from .testdata import * @@ -254,3 +256,21 @@ def test_captures_checking(): assert (160,224) == sigmf4.get_capture_byte_boundarys(1) assert np.array_equal(np.array(range(64)), sigmf4.read_samples_in_capture(0,autoscale=False)[:,0]) assert np.array_equal(np.array(range(64,96)), sigmf4.read_samples_in_capture(1,autoscale=False)[:,1]) + + +def test_archive_no_name_raises_exception(): + "Exception should be raised when no name set in SigMFFile constructor or in archive() method for sigmffile_name parameter" + with tempfile.NamedTemporaryFile() as temp_file: + data = np.ones(128, dtype=np.float32) + data.tofile(temp_file.name) + sigmffile = SigMFFile( + data_file=temp_file.name, + global_info={ + SigMFFile.DATATYPE_KEY: 'rf32_le', + SigMFFile.NUM_CHANNELS_KEY: 1, + }, + ) + with pytest.raises(SigMFFileError): + sigmffile.archive() + with pytest.raises(SigMFFileError): + sigmffile.archive(archive_name="test") From 0c503abebfaa8c20669d6f10ff27ba52b7980020 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 5 May 2023 13:16:22 -0600 Subject: [PATCH 07/28] require name in SigMFFile constructor --- sigmf/archivereader.py | 8 +++++++- sigmf/sigmffile.py | 20 +++++++------------ tests/test_archive.py | 39 +++++++++++++++++++++++++++---------- tests/test_archivereader.py | 2 +- tests/test_sigmffile.py | 38 +++++++++++------------------------- tests/test_validation.py | 4 ++-- 6 files changed, 57 insertions(+), 54 deletions(-) diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 7192f88..80d3a94 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -45,6 +45,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu json_contents = None data_offset_size = None + sigmffile_name = None self.sigmffiles = [] data_found = False @@ -64,6 +65,10 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu with tar_obj.extractfile(memb) as memb_fid: json_contents = memb_fid.read() + _, sigmffile_name = os.path.split(memb.name) + sigmffile_name, _ = os.path.splitext(sigmffile_name) + + elif memb.name.endswith(SIGMF_DATASET_EXT): data_offset_size = memb.offset_data, memb.size data_found = True @@ -74,7 +79,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.') if data_offset_size is not None and json_contents is not None: - sigmffile = SigMFFile(metadata=json_contents) + sigmffile = SigMFFile(sigmffile_name, metadata=json_contents) valid_md = sigmffile.validate() sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0], @@ -85,6 +90,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu self.sigmffiles.append(sigmffile) data_offset_size = None json_contents = None + sigmffile_name = None if not data_found: diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 86cb086..f25aee4 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -148,7 +148,7 @@ class SigMFFile(SigMFMetafile): ] VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS} - def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True, name=None): + def __init__(self, name, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True): ''' API for SigMF I/O @@ -533,23 +533,17 @@ def validate(self): version = self.get_global_field(self.VERSION_KEY) validate.validate(self._metadata, self.get_schema()) - def archive(self, sigmffile_name=None, archive_name=None, fileobj=None): + def archive(self, file_path=None, fileobj=None): """Dump contents to SigMF archive format. - `sigmffile_name` determines the directory and filenames inside the archive. If - not specified, you must have set the instance variable `self.name` - - `arhive_name` is passed to SigMFArchive `name` and `fileobj` is passed to + `file_path` is passed to SigMFArchive `path` and `fileobj` is passed to SigMFArchive `fileobj`. """ - if sigmffile_name is not None: - self.name = sigmffile_name - - if archive_name is None: - archive_name = self.name + if file_path is None: + file_path = self.name - archive = SigMFArchive(self, archive_name, fileobj) + archive = SigMFArchive(self, file_path, fileobj) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): @@ -976,7 +970,7 @@ def fromfile(filename, skip_checksum=False): meta_fp.close() data_fn = get_dataset_filename_from_metadata(meta_fn, metadata) - return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum) + return SigMFFile(name=fns['base_fn'], metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum) def get_sigmf_filenames(filename): diff --git a/tests/test_archive.py b/tests/test_archive.py index da4b5ac..7a82df6 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -15,8 +15,8 @@ from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 -def create_test_archive(test_sigmffile, tmpfile, sigmffile_name="test"): - sigmf_archive = test_sigmffile.archive(sigmffile_name=sigmffile_name, fileobj=tmpfile) +def create_test_archive(test_sigmffile, tmpfile): + sigmf_archive = test_sigmffile.archive(fileobj=tmpfile) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r", format=tarfile.PAX_FORMAT) return sigmf_tarfile @@ -25,20 +25,20 @@ def test_without_data_file_throws_fileerror(test_sigmffile): test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(archive_name=temp.name) + test_sigmffile.archive(file_path=temp.name) def test_invalid_md_throws_validationerror(test_sigmffile): del test_sigmffile._metadata["global"]["core:datatype"] # required field with tempfile.NamedTemporaryFile() as temp: with pytest.raises(jsonschema.exceptions.ValidationError): - test_sigmffile.archive(archive_name=temp.name) + test_sigmffile.archive(file_path=temp.name) def test_name_wrong_extension_throws_fileerror(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(archive_name=temp.name + ".zip") + test_sigmffile.archive(file_path=temp.name + ".zip") def test_fileobj_extension_ignored(test_sigmffile): @@ -48,7 +48,7 @@ def test_fileobj_extension_ignored(test_sigmffile): def test_name_used_in_fileobj(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: - sigmf_archive = test_sigmffile.archive(archive_name="testarchive", fileobj=temp) + sigmf_archive = test_sigmffile.archive(file_path="testarchive", fileobj=temp) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r") basedir, file1, file2 = sigmf_tarfile.getmembers() assert basedir.name == test_sigmffile.name @@ -79,7 +79,7 @@ def test_unwritable_name_throws_fileerror(test_sigmffile): # so use invalid filename unwritable_file = '/bad_name/' with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(archive_name=unwritable_file) + test_sigmffile.archive(file_path=unwritable_file) def test_tarfile_layout(test_sigmffile): @@ -124,12 +124,12 @@ def test_sf_fromarchive_multirec(test_sigmffile, test_alternate_sigmffile): -def test_multirec_archive_into_fileobj(test_sigmffile): +def test_multirec_archive_into_fileobj(test_sigmffile, test_alternate_sigmffile): with tempfile.NamedTemporaryFile() as t: # add first sigmffile to the fileobj t - create_test_archive(test_sigmffile, t, sigmffile_name="test1") + create_test_archive(test_sigmffile, t) # add a second one to the same fileobj - multirec_tar = create_test_archive(test_sigmffile, t, sigmffile_name="test2") + multirec_tar = create_test_archive(test_alternate_sigmffile, t) members = multirec_tar.getmembers() assert len(members) == 6 # 2 directories and 2 files per directory @@ -178,3 +178,22 @@ def test_create_archive_pathlike(test_sigmffile, test_alternate_sigmffile): output_sigmf_files = sigmffile.fromarchive(archive_path=arch.path) assert len(output_sigmf_files) == 2 assert input_sigmffiles == output_sigmf_files + + +def test_archive_names(test_sigmffile): + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + a = SigMFArchive(test_sigmffile, t.name) + assert a.path == t.name + observed_sigmffile = sigmffile.fromarchive(t.name)[0] + assert observed_sigmffile.name == test_sigmffile.name + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + archive_path = test_sigmffile.archive(t.name) + assert archive_path == t.name + observed_sigmffile = sigmffile.fromarchive(t.name)[0] + assert observed_sigmffile.name == test_sigmffile.name + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + test_sigmffile.tofile(t.name, toarchive=True) + observed_sigmffile = sigmffile.fromarchive(t.name)[0] + assert observed_sigmffile.name == test_sigmffile.name diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index 626f6ce..ac9a9f4 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -47,7 +47,7 @@ def test_access_data_without_untar(test_sigmffile): test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: b.tofile(temp.name) - meta = SigMFFile(data_file=temp.name, global_info=global_info) + meta = SigMFFile("test", data_file=temp.name, global_info=global_info) meta.add_capture(0, metadata=capture_info) meta.tofile(archive_filename, toarchive=True) diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index f772269..9d8a08c 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -38,7 +38,7 @@ def setUp(self): '''assure tests have a valid SigMF object to work with''' _, temp_path = tempfile.mkstemp() TEST_FLOAT32_DATA_1.tofile(temp_path) - self.sigmf_object = SigMFFile(TEST_METADATA_1, data_file=temp_path) + self.sigmf_object = SigMFFile("test", TEST_METADATA_1, data_file=temp_path) def test_iterator_basic(self): '''make sure default batch_size works''' @@ -66,28 +66,28 @@ def simulate_capture(sigmf_md, n, capture_len): def test_default_constructor(): - SigMFFile() + SigMFFile(name="test") def test_set_non_required_global_field(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.set_global_field('this_is:not_in_the_schema', None) def test_add_capture(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.add_capture(start_index=0, metadata={}) def test_add_annotation(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.add_capture(start_index=0) meta = {"latitude": 40.0, "longitude": -105.0} sigf.add_annotation(start_index=0, length=128, metadata=meta) def test_add_annotation_with_duplicate_key(): - f = SigMFFile() + f = SigMFFile(name="test") f.add_capture(start_index=0) m1 = {"latitude": 40.0, "longitude": -105.0} f.add_annotation(start_index=0, length=128, metadata=m1) @@ -100,7 +100,7 @@ def test_fromarchive(test_sigmffile): print("test_sigmffile is:\n", test_sigmffile) tf = tempfile.mkstemp()[1] td = tempfile.mkdtemp() - archive_path = test_sigmffile.archive(archive_name=tf) + archive_path = test_sigmffile.archive(file_path=tf) result = sigmffile.fromarchive(archive_path=archive_path, dir=td) assert len(result) == 1 result = result[0] @@ -110,7 +110,7 @@ def test_fromarchive(test_sigmffile): def test_add_multiple_captures_and_annotations(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") for idx in range(3): simulate_capture(sigf, idx, 1024) @@ -138,6 +138,7 @@ def test_multichannel_types(): # for real or complex check_count = raw_count * 1 # deepcopy temp_signal = SigMFFile( + name="test", data_file=temp_path, global_info={ SigMFFile.DATATYPE_KEY: f'{complex_prefix}{key}_le', @@ -163,6 +164,7 @@ def test_multichannel_seek(): # write some dummy data and read back np.arange(18, dtype=np.uint16).tofile(temp_path) temp_signal = SigMFFile( + name="test", data_file=temp_path, global_info={ SigMFFile.DATATYPE_KEY: 'cu16_le', @@ -192,7 +194,7 @@ def test_key_validity(): def test_ordered_metadata(): '''check to make sure the metadata is sorted as expected''' - sigf = SigMFFile() + sigf = SigMFFile(name="test") top_sort_order = ['global', 'captures', 'annotations'] for kdx, key in enumerate(sigf.ordered_metadata()): assert kdx == top_sort_order.index(key) @@ -256,21 +258,3 @@ def test_captures_checking(): assert (160,224) == sigmf4.get_capture_byte_boundarys(1) assert np.array_equal(np.array(range(64)), sigmf4.read_samples_in_capture(0,autoscale=False)[:,0]) assert np.array_equal(np.array(range(64,96)), sigmf4.read_samples_in_capture(1,autoscale=False)[:,1]) - - -def test_archive_no_name_raises_exception(): - "Exception should be raised when no name set in SigMFFile constructor or in archive() method for sigmffile_name parameter" - with tempfile.NamedTemporaryFile() as temp_file: - data = np.ones(128, dtype=np.float32) - data.tofile(temp_file.name) - sigmffile = SigMFFile( - data_file=temp_file.name, - global_info={ - SigMFFile.DATATYPE_KEY: 'rf32_le', - SigMFFile.NUM_CHANNELS_KEY: 1, - }, - ) - with pytest.raises(SigMFFileError): - sigmffile.archive() - with pytest.raises(SigMFFileError): - sigmffile.archive(archive_name="test") diff --git a/tests/test_validation.py b/tests/test_validation.py index 4d4f3b5..ba49fe8 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -32,7 +32,7 @@ def test_valid_data(): '''assure the supplied metadata is OK''' invalid_metadata = dict(TEST_METADATA_1) - SigMFFile(TEST_METADATA_1).validate() + SigMFFile("test", TEST_METADATA_1).validate() class FailingCases(unittest.TestCase): '''Cases where the validator should throw an exception.''' @@ -86,4 +86,4 @@ def test_invalid_hash(self): TEST_FLOAT32_DATA_1.tofile(temp_path) self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = 'derp' with self.assertRaises(sigmf.error.SigMFFileError): - SigMFFile(metadata=self.metadata, data_file=temp_path) + SigMFFile(name="test", metadata=self.metadata, data_file=temp_path) From d234ddf64d01bf47b5f7245545417e70217c15db Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 5 May 2023 15:43:27 -0600 Subject: [PATCH 08/28] return single or list of SigMFFiles in fromarchive --- sigmf/sigmffile.py | 4 +++- tests/conftest.py | 18 +++++++++++++++++- tests/test_archive.py | 6 +++--- tests/test_sigmffile.py | 40 ++++++++++++++++++++++++++++++++++++---- tests/testdata.py | 13 +++++++++++++ 5 files changed, 72 insertions(+), 9 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index f25aee4..77f5175 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -923,7 +923,9 @@ def fromarchive(archive_path, dir=None): access SigMF archives without extracting them. """ from .archivereader import SigMFArchiveReader - return SigMFArchiveReader(archive_path).sigmffiles + sigmffiles = SigMFArchiveReader(archive_path).sigmffiles + if len(sigmffiles) == 1: return sigmffiles[0] + else: return sigmffiles def fromfile(filename, skip_checksum=False): diff --git a/tests/conftest.py b/tests/conftest.py index 295998b..7b50c58 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,7 @@ from sigmf.sigmffile import SigMFFile -from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1, TEST_FLOAT32_DATA_2, TEST_METADATA_2 +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1, TEST_FLOAT32_DATA_2, TEST_METADATA_2, TEST_FLOAT32_DATA_3, TEST_METADATA_3 @pytest.fixture @@ -39,6 +39,12 @@ def test_data_file_2(): TEST_FLOAT32_DATA_2.tofile(t.name) yield t +@pytest.yield_fixture +def test_data_file_3(): + with tempfile.NamedTemporaryFile() as t: + TEST_FLOAT32_DATA_3.tofile(t.name) + yield t + @pytest.fixture def test_sigmffile(test_data_file_1): @@ -59,3 +65,13 @@ def test_alternate_sigmffile(test_data_file_2): f.set_data_file(test_data_file_2.name) assert f._metadata == TEST_METADATA_2 return f + +@pytest.fixture +def test_alternate_sigmffile_2(test_data_file_3): + f = SigMFFile(name='test3') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_3)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_3.name) + assert f._metadata == TEST_METADATA_3 + return f diff --git a/tests/test_archive.py b/tests/test_archive.py index 7a82df6..4b3f845 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -184,16 +184,16 @@ def test_archive_names(test_sigmffile): with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: a = SigMFArchive(test_sigmffile, t.name) assert a.path == t.name - observed_sigmffile = sigmffile.fromarchive(t.name)[0] + observed_sigmffile = sigmffile.fromarchive(t.name) assert observed_sigmffile.name == test_sigmffile.name with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: archive_path = test_sigmffile.archive(t.name) assert archive_path == t.name - observed_sigmffile = sigmffile.fromarchive(t.name)[0] + observed_sigmffile = sigmffile.fromarchive(t.name) assert observed_sigmffile.name == test_sigmffile.name with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: test_sigmffile.tofile(t.name, toarchive=True) - observed_sigmffile = sigmffile.fromarchive(t.name)[0] + observed_sigmffile = sigmffile.fromarchive(t.name) assert observed_sigmffile.name == test_sigmffile.name diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 9d8a08c..2454f14 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -28,7 +28,8 @@ from sigmf import sigmffile, utils from sigmf.error import SigMFFileError -from sigmf.sigmffile import SigMFFile +from sigmf.sigmffile import SigMFFile, fromarchive +from sigmf.archive import SigMFArchive from .testdata import * @@ -102,12 +103,43 @@ def test_fromarchive(test_sigmffile): td = tempfile.mkdtemp() archive_path = test_sigmffile.archive(file_path=tf) result = sigmffile.fromarchive(archive_path=archive_path, dir=td) - assert len(result) == 1 - result = result[0] - assert result._metadata == test_sigmffile._metadata == TEST_METADATA_1 + assert result == test_sigmffile os.remove(tf) shutil.rmtree(td) +def test_from_archive_multiple_recordings(test_sigmffile, test_alternate_sigmffile, test_alternate_sigmffile_2): + # single recording + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + test_sigmffile.archive(fileobj=t_file) + single_sigmffile = fromarchive(path) + assert isinstance(single_sigmffile, SigMFFile) + assert single_sigmffile == test_sigmffile + + # 2 recordings + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + SigMFArchive([test_sigmffile, test_alternate_sigmffile], fileobj=t_file) + sigmffile_one, sigmffile_two = fromarchive(path) + assert isinstance(sigmffile_one, SigMFFile) + assert sigmffile_one == test_sigmffile + assert isinstance(sigmffile_two, SigMFFile) + assert sigmffile_two == test_alternate_sigmffile + + + # 3 recordings + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + SigMFArchive([test_sigmffile, test_alternate_sigmffile, test_alternate_sigmffile_2], fileobj=t_file) + list_of_sigmffiles = fromarchive(path) + assert len(list_of_sigmffiles) == 3 + assert isinstance(list_of_sigmffiles[0], SigMFFile) + assert list_of_sigmffiles[0] == test_sigmffile + assert isinstance(list_of_sigmffiles[1], SigMFFile) + assert list_of_sigmffiles[1] == test_alternate_sigmffile + assert isinstance(list_of_sigmffiles[2], SigMFFile) + assert list_of_sigmffiles[2] == test_alternate_sigmffile_2 + def test_add_multiple_captures_and_annotations(): sigf = SigMFFile(name="test") diff --git a/tests/testdata.py b/tests/testdata.py index a744ae1..db55c56 100644 --- a/tests/testdata.py +++ b/tests/testdata.py @@ -51,6 +51,19 @@ } } +TEST_FLOAT32_DATA_3 = np.arange(32, 48, dtype=np.float32) + +TEST_METADATA_3 = { + SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.GLOBAL_KEY: { + SigMFFile.DATATYPE_KEY: 'rf32_le', + SigMFFile.HASH_KEY: '089753bd48a1724c485e822eaf4d510491e4e54faa83cc3e7b3f18a9f651813190862aa97c922278454c66f20a741050762e008cbe4f96f3bd0dcdb7d720179d', + SigMFFile.NUM_CHANNELS_KEY: 1, + SigMFFile.VERSION_KEY: __version__ + } +} + # Data0 is a test of a compliant two capture recording TEST_U8_DATA0 = list(range(256)) TEST_U8_META0 = { From 348bed8184cfaa4ee8973387afc7b95be337d1a9 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 8 May 2023 12:37:39 -0600 Subject: [PATCH 09/28] fix some formatting, unused imports, docstrings, rename archivereader "name" to "path", rename duplicate test name --- sigmf/archive.py | 23 +++++++++++++-------- sigmf/archivereader.py | 41 +++++++++++++++++++------------------ sigmf/sigmffile.py | 23 +++++++++++++++------ tests/conftest.py | 15 +++++++++++--- tests/test_archive.py | 9 +++++--- tests/test_archivereader.py | 18 +++++++--------- tests/test_sigmffile.py | 20 +++++++++++------- tests/test_validation.py | 3 ++- 8 files changed, 92 insertions(+), 60 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index a728280..039ed74 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -34,7 +34,8 @@ class SigMFArchive(): Parameters: - sigmffile -- An iterable of SigMFFile objects with valid metadata and data_files + sigmffile -- An iterable of SigMFFile objects with valid metadata and + data_files path -- path to archive file to create. If file exists, overwrite. If `path` doesn't end in .sigmf, it will be appended. The @@ -49,18 +50,22 @@ class SigMFArchive(): supposed to be at position 0. `fileobj` won't be closed. If `fileobj` is given, `path` has no effect. """ - def __init__(self, sigmffiles : Union["SigMFFile", Iterable["SigMFFile"]], path : Union[str, os.PathLike] = None, fileobj : BinaryIO =None): + def __init__(self, + sigmffiles: Union["sigmf.sigmffile.SigMFFile", + Iterable["sigmf.sigmffile.SigMFFile"]], + path: Union[str, os.PathLike] = None, + fileobj: BinaryIO = None): if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile): self.sigmffiles = [sigmffiles] - elif hasattr(collections, "Iterable") and isinstance(sigmffiles, collections.Iterable): + elif (hasattr(collections, "Iterable") and + isinstance(sigmffiles, collections.Iterable)): self.sigmffiles = sigmffiles - elif isinstance(sigmffiles, collections.abc.Iterable): # python 3.10 + elif isinstance(sigmffiles, collections.abc.Iterable): # python 3.10 self.sigmffiles = sigmffiles else: raise SigMFFileError("Unknown type for sigmffiles argument!") - - + self.path = str(path) self.fileobj = fileobj @@ -73,11 +78,11 @@ def __init__(self, sigmffiles : Union["SigMFFile", Iterable["SigMFFile"]], path fileobj=sigmf_fileobj, format=tarfile.PAX_FORMAT) except tarfile.ReadError: - # fileobj doesn't contain any archives yet, so reopen in 'w' mode + # fileobj doesn't contain any archives yet, so reopen in 'w' mode sigmf_archive = tarfile.TarFile(mode='w', fileobj=sigmf_fileobj, format=tarfile.PAX_FORMAT) - + def chmod(tarinfo): if tarinfo.isdir(): tarinfo.mode = 0o755 # dwrxw-rw-r @@ -132,7 +137,7 @@ def _ensure_sigmffile_name_set(sigmffile): if not sigmffile.name: err = "the `name` attribute must be set to pass to `SigMFArchive`" raise SigMFFileError(err) - + @staticmethod def _ensure_data_file_set(sigmffile): if not sigmffile.data_file: diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 80d3a94..7a6bfcb 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -7,35 +7,32 @@ """Access SigMF archives without extracting them.""" import os -import shutil import tarfile -import tempfile -from . import __version__ #, schema, sigmf_hash, validate from .sigmffile import SigMFFile -from .archive import SigMFArchive, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT -from .utils import dict_merge +from .archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT from .error import SigMFFileError class SigMFArchiveReader(): - """Access data within SigMF archive `tar` in-place without extracting. + """Access data within SigMF archive `tar` in-place without extracting. This + class can be used to iterate through multiple SigMFFiles in the archive. Parameters: - name -- path to archive file to access. If file does not exist, - or if `name` doesn't end in .sigmf, SigMFFileError is raised. + path -- path to archive file to access. If file does not exist, + or if `path` doesn't end in .sigmf, SigMFFileError is raised. """ - def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None): - self.name = name + def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_buffer=None): + self.path = path tar_obj = None try: - if self.name is not None: - if not name.endswith(SIGMF_ARCHIVE_EXT): + if self.path is not None: + if not self.path.endswith(SIGMF_ARCHIVE_EXT): err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) raise SigMFFileError(err) - tar_obj = tarfile.open(self.name) + tar_obj = tarfile.open(self.path) elif archive_buffer is not None: tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') @@ -67,7 +64,6 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu _, sigmffile_name = os.path.split(memb.name) sigmffile_name, _ = os.path.splitext(sigmffile_name) - elif memb.name.endswith(SIGMF_DATASET_EXT): data_offset_size = memb.offset_data, memb.size @@ -79,11 +75,16 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.') if data_offset_size is not None and json_contents is not None: - sigmffile = SigMFFile(sigmffile_name, metadata=json_contents) - valid_md = sigmffile.validate() + sigmffile = SigMFFile(sigmffile_name, + metadata=json_contents) + sigmffile.validate() - sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0], - size_bytes=data_offset_size[1], map_readonly=map_readonly) + sigmffile.set_data_file(self.path, + data_buffer=archive_buffer, + skip_checksum=skip_checksum, + offset=data_offset_size[0], + size_bytes=data_offset_size[1], + map_readonly=map_readonly) self.ndim = sigmffile.ndim self.shape = sigmffile.shape @@ -91,12 +92,12 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu data_offset_size = None json_contents = None sigmffile_name = None - if not data_found: raise SigMFFileError('No .sigmf-data file found in archive!') finally: - if tar_obj: tar_obj.close() + if tar_obj: + tar_obj.close() def __len__(self): return len(self.sigmffiles) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 77f5175..0df45a8 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -148,7 +148,13 @@ class SigMFFile(SigMFMetafile): ] VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS} - def __init__(self, name, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True): + def __init__(self, + name, + metadata=None, + data_file=None, + global_info=None, + skip_checksum=False, + map_readonly=True): ''' API for SigMF I/O @@ -220,7 +226,7 @@ def __getitem__(self, sli): else: raise ValueError("unhandled ndim in SigMFFile.__getitem__(); this shouldn't happen") return a - + def __eq__(self, other): """Define equality between two `SigMFFile`s. @@ -542,7 +548,7 @@ def archive(self, file_path=None, fileobj=None): """ if file_path is None: file_path = self.name - + archive = SigMFArchive(self, file_path, fileobj) return archive.path @@ -924,8 +930,10 @@ def fromarchive(archive_path, dir=None): """ from .archivereader import SigMFArchiveReader sigmffiles = SigMFArchiveReader(archive_path).sigmffiles - if len(sigmffiles) == 1: return sigmffiles[0] - else: return sigmffiles + if len(sigmffiles) == 1: + return sigmffiles[0] + else: + return sigmffiles def fromfile(filename, skip_checksum=False): @@ -972,7 +980,10 @@ def fromfile(filename, skip_checksum=False): meta_fp.close() data_fn = get_dataset_filename_from_metadata(meta_fn, metadata) - return SigMFFile(name=fns['base_fn'], metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum) + return SigMFFile(name=fns['base_fn'], + metadata=metadata, + data_file=data_fn, + skip_checksum=skip_checksum) def get_sigmf_filenames(filename): diff --git a/tests/conftest.py b/tests/conftest.py index 7b50c58..60f0be4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,12 @@ from sigmf.sigmffile import SigMFFile -from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1, TEST_FLOAT32_DATA_2, TEST_METADATA_2, TEST_FLOAT32_DATA_3, TEST_METADATA_3 +from .testdata import (TEST_FLOAT32_DATA_1, + TEST_METADATA_1, + TEST_FLOAT32_DATA_2, + TEST_METADATA_2, + TEST_FLOAT32_DATA_3, + TEST_METADATA_3) @pytest.fixture @@ -33,13 +38,15 @@ def test_data_file_1(): TEST_FLOAT32_DATA_1.tofile(temp.name) yield temp -@pytest.yield_fixture + +@pytest.fixture def test_data_file_2(): with tempfile.NamedTemporaryFile() as t: TEST_FLOAT32_DATA_2.tofile(t.name) yield t -@pytest.yield_fixture + +@pytest.fixture def test_data_file_3(): with tempfile.NamedTemporaryFile() as t: TEST_FLOAT32_DATA_3.tofile(t.name) @@ -56,6 +63,7 @@ def test_sigmffile(test_data_file_1): assert f._metadata == TEST_METADATA_1 return f + @pytest.fixture def test_alternate_sigmffile(test_data_file_2): f = SigMFFile(name='test2') @@ -66,6 +74,7 @@ def test_alternate_sigmffile(test_data_file_2): assert f._metadata == TEST_METADATA_2 return f + @pytest.fixture def test_alternate_sigmffile_2(test_data_file_3): f = SigMFFile(name='test3') diff --git a/tests/test_archive.py b/tests/test_archive.py index 4b3f845..d340cb2 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -48,7 +48,8 @@ def test_fileobj_extension_ignored(test_sigmffile): def test_name_used_in_fileobj(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: - sigmf_archive = test_sigmffile.archive(file_path="testarchive", fileobj=temp) + sigmf_archive = test_sigmffile.archive(file_path="testarchive", + fileobj=temp) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r") basedir, file1, file2 = sigmf_tarfile.getmembers() assert basedir.name == test_sigmffile.name @@ -112,6 +113,7 @@ def test_tarfile_names_and_extensions(test_sigmffile): assert path.split(file2_name)[-1] == test_sigmffile.name assert file2_ext in file_extensions + def test_sf_fromarchive_multirec(test_sigmffile, test_alternate_sigmffile): """`SigMFFile.fromarchive` should return list of SigMFFiles.""" with tempfile.NamedTemporaryFile(delete=True) as tf: @@ -123,8 +125,8 @@ def test_sf_fromarchive_multirec(test_sigmffile, test_alternate_sigmffile): assert input_sigmffiles == output_sigmf_files - -def test_multirec_archive_into_fileobj(test_sigmffile, test_alternate_sigmffile): +def test_multirec_archive_into_fileobj(test_sigmffile, + test_alternate_sigmffile): with tempfile.NamedTemporaryFile() as t: # add first sigmffile to the fileobj t create_test_archive(test_sigmffile, t) @@ -171,6 +173,7 @@ def test_tarfile_type(test_sigmffile): sigmf_tarfile = create_test_archive(test_sigmffile, temp) assert sigmf_tarfile.format == tarfile.PAX_FORMAT + def test_create_archive_pathlike(test_sigmffile, test_alternate_sigmffile): with tempfile.NamedTemporaryFile() as t: input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index ac9a9f4..e5a4ca5 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -1,15 +1,10 @@ -import codecs -import json -import tarfile import tempfile -from os import path import numpy as np -import pytest -from sigmf import error from sigmf import SigMFFile, SigMFArchiveReader -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive +from sigmf.archive import SigMFArchive + def test_access_data_without_untar(test_sigmffile): global_info = { @@ -24,7 +19,7 @@ def test_access_data_without_untar(test_sigmffile): "core:datetime": "2021-06-18T23:17:51.163959Z", "core:sample_start": 0 } - + NUM_ROWS = 5 for dt in "ri16_le", "ci16_le", "rf32_le", "rf64_le", "cf32_le", "cf64_le": @@ -33,7 +28,7 @@ def test_access_data_without_untar(test_sigmffile): global_info["core:num_channels"] = num_chan base_filename = dt + '_' + str(num_chan) archive_filename = base_filename + '.sigmf' - + a = np.arange(NUM_ROWS * num_chan * (2 if 'c' in dt else 1)) if 'i16' in dt: b = a.astype(np.int16) @@ -43,7 +38,7 @@ def test_access_data_without_untar(test_sigmffile): b = a.astype(np.float64) else: raise ValueError('whoops') - + test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: b.tofile(temp.name) @@ -53,6 +48,7 @@ def test_access_data_without_untar(test_sigmffile): archi = SigMFArchiveReader(archive_filename, skip_checksum=True) + def test_extract_single_recording(test_sigmffile): with tempfile.NamedTemporaryFile() as tf: expected_sigmffile = test_sigmffile @@ -71,4 +67,4 @@ def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile): reader = SigMFArchiveReader(arch.path) assert len(reader) == 2 for expected in expected_sigmffiles: - assert expected in reader.sigmffiles \ No newline at end of file + assert expected in reader.sigmffiles diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 2454f14..da036d2 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -23,11 +23,9 @@ import tempfile import json import numpy as np -import pytest import unittest from sigmf import sigmffile, utils -from sigmf.error import SigMFFileError from sigmf.sigmffile import SigMFFile, fromarchive from sigmf.archive import SigMFArchive @@ -39,7 +37,9 @@ def setUp(self): '''assure tests have a valid SigMF object to work with''' _, temp_path = tempfile.mkstemp() TEST_FLOAT32_DATA_1.tofile(temp_path) - self.sigmf_object = SigMFFile("test", TEST_METADATA_1, data_file=temp_path) + self.sigmf_object = SigMFFile("test", + TEST_METADATA_1, + data_file=temp_path) def test_iterator_basic(self): '''make sure default batch_size works''' @@ -107,7 +107,10 @@ def test_fromarchive(test_sigmffile): os.remove(tf) shutil.rmtree(td) -def test_from_archive_multiple_recordings(test_sigmffile, test_alternate_sigmffile, test_alternate_sigmffile_2): + +def test_fromarchive_multi_recording(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): # single recording with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: path = t_file.name @@ -119,18 +122,21 @@ def test_from_archive_multiple_recordings(test_sigmffile, test_alternate_sigmffi # 2 recordings with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: path = t_file.name - SigMFArchive([test_sigmffile, test_alternate_sigmffile], fileobj=t_file) + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + SigMFArchive(input_sigmffiles, fileobj=t_file) sigmffile_one, sigmffile_two = fromarchive(path) assert isinstance(sigmffile_one, SigMFFile) assert sigmffile_one == test_sigmffile assert isinstance(sigmffile_two, SigMFFile) assert sigmffile_two == test_alternate_sigmffile - # 3 recordings with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: path = t_file.name - SigMFArchive([test_sigmffile, test_alternate_sigmffile, test_alternate_sigmffile_2], fileobj=t_file) + input_sigmffiles = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + SigMFArchive(input_sigmffiles, fileobj=t_file) list_of_sigmffiles = fromarchive(path) assert len(list_of_sigmffiles) == 3 assert isinstance(list_of_sigmffiles[0], SigMFFile) diff --git a/tests/test_validation.py b/tests/test_validation.py index ba49fe8..57a186c 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -34,6 +34,7 @@ def test_valid_data(): invalid_metadata = dict(TEST_METADATA_1) SigMFFile("test", TEST_METADATA_1).validate() + class FailingCases(unittest.TestCase): '''Cases where the validator should throw an exception.''' def setUp(self): @@ -45,7 +46,7 @@ def test_extra_top_level_key(self): with self.assertRaises(ValidationError): SigMFFile(self.metadata).validate() - def test_extra_top_level_key(self): + def test_invalid_label(self): '''label must be less than 20 chars''' self.metadata[SigMFFile.ANNOTATION_KEY][0][SigMFFile.LABEL_KEY] = 'a' * 21 with self.assertRaises(ValidationError): From b6df262c9da0f763d2c54eb212443663d4f91ada Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 11 May 2023 15:07:28 -0600 Subject: [PATCH 10/28] add support for collections in archives, check for path and fileobj in archive, add __eq__ method for SigMFCollection, improved handling of metadata in collection, fixed bug in read_samples() --- sigmf/archive.py | 47 ++++++++++- sigmf/archivereader.py | 23 ++++- sigmf/sigmffile.py | 51 +++++++++-- tests/test_archive.py | 164 +++++++++++++++++++++++++++++++++++- tests/test_archivereader.py | 43 +++++++++- tests/test_sigmffile.py | 81 +++++++++++++++++- 6 files changed, 391 insertions(+), 18 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 039ed74..75a503a 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -16,7 +16,7 @@ import sigmf -from .error import SigMFFileError +from .error import SigMFFileError, SigMFValidationError SIGMF_ARCHIVE_EXT = ".sigmf" @@ -53,9 +53,14 @@ class SigMFArchive(): def __init__(self, sigmffiles: Union["sigmf.sigmffile.SigMFFile", Iterable["sigmf.sigmffile.SigMFFile"]], + collectionfile: "sigmf.sigmffile.SigMFCollection" = None, path: Union[str, os.PathLike] = None, fileobj: BinaryIO = None): + if (not path) and (not fileobj): + raise SigMFFileError("'path' or 'fileobj' required for creating " + "SigMF archive!") + if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile): self.sigmffiles = [sigmffiles] elif (hasattr(collections, "Iterable") and @@ -66,11 +71,16 @@ def __init__(self, else: raise SigMFFileError("Unknown type for sigmffiles argument!") - self.path = str(path) + if path: + self.path = str(path) + else: + self.path = None self.fileobj = fileobj + self.collectionfile = collectionfile self._check_input() + archive_name = self._get_archive_name() mode = "a" if fileobj is not None else "w" sigmf_fileobj = self._get_output_fileobj() try: @@ -90,6 +100,15 @@ def chmod(tarinfo): tarinfo.mode = 0o644 # -wr-r--r-- return tarinfo + if collectionfile: + with tempfile.NamedTemporaryFile(mode="w") as tmpfile: + collectionfile.dump(tmpfile, pretty=True) + tmpfile.flush() + collection_filename = archive_name + SIGMF_COLLECTION_EXT + sigmf_archive.add(tmpfile.name, + arcname=collection_filename, + filter=chmod) + for sigmffile in self.sigmffiles: with tempfile.TemporaryDirectory() as tmpdir: sigmf_md_filename = sigmffile.name + SIGMF_METADATA_EXT @@ -117,6 +136,9 @@ def _check_input(self): self._ensure_sigmffile_name_set(sigmffile) self._ensure_data_file_set(sigmffile) self._validate_sigmffile_metadata(sigmffile) + if self.collectionfile: + self._validate_sigmffile_collection(self.collectionfile, + self.sigmffiles) def _ensure_path_has_correct_extension(self): path = self.path @@ -148,6 +170,27 @@ def _ensure_data_file_set(sigmffile): def _validate_sigmffile_metadata(sigmffile): sigmffile.validate() + @staticmethod + def _validate_sigmffile_collection(collectionfile, sigmffiles): + if len(collectionfile) != len(sigmffiles): + raise SigMFValidationError("Mismatched number of recordings " + "between sigmffiles and collection " + "file!") + streams_key = collectionfile.STREAMS_KEY + streams = collectionfile.get_collection_field(streams_key) + sigmf_meta_hashes = [s["hash"] for s in streams] + if not streams: + raise SigMFValidationError("No recordings in collection file!") + for sigmffile in sigmffiles: + with tempfile.NamedTemporaryFile(mode="w") as tmpfile: + sigmffile.dump(tmpfile, pretty=True) + tmpfile.flush() + meta_path = tmpfile.name + sigmf_meta_hash = sigmf.sigmf_hash.calculate_sha512(meta_path) + if sigmf_meta_hash not in sigmf_meta_hashes: + raise SigMFValidationError("SigMFFile given that " + "is not in collection file!") + def _get_archive_name(self): if self.fileobj and not self.path: pathname = self.fileobj.name diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 7a6bfcb..7805790 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -9,8 +9,11 @@ import os import tarfile -from .sigmffile import SigMFFile -from .archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT +from .sigmffile import SigMFCollection, SigMFFile +from .archive import (SIGMF_COLLECTION_EXT, + SIGMF_DATASET_EXT, + SIGMF_METADATA_EXT, + SIGMF_ARCHIVE_EXT) from .error import SigMFFileError @@ -45,6 +48,7 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu sigmffile_name = None self.sigmffiles = [] data_found = False + collection_metadata = {} for memb in tar_obj.getmembers(): if memb.isdir(): # memb.type == tarfile.DIRTYPE: @@ -64,11 +68,12 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu _, sigmffile_name = os.path.split(memb.name) sigmffile_name, _ = os.path.splitext(sigmffile_name) - elif memb.name.endswith(SIGMF_DATASET_EXT): data_offset_size = memb.offset_data, memb.size data_found = True - + elif memb.name.endswith(SIGMF_COLLECTION_EXT): + with tar_obj.extractfile(memb) as collection_f: + collection_metadata = collection_f.read() else: print('A regular file', memb.name, 'was found but ignored in the archive') else: @@ -92,6 +97,16 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu data_offset_size = None json_contents = None sigmffile_name = None + if collection_metadata: + # Currently the SigMFCollection class does not support getting + # SigMFFiles (SigMFCollection.get_SigMFFile()) when created + # here in SigMFArchiveReader. This is because the SigMF + # metadata files are not extracted from the tarfile to the + # file system. + self.collection = SigMFCollection(metadata=collection_metadata, + skip_checksums=True) + else: + self.collection = None if not data_found: raise SigMFFileError('No .sigmf-data file found in archive!') diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 0df45a8..8ffdaf6 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -549,7 +549,7 @@ def archive(self, file_path=None, fileobj=None): if file_path is None: file_path = self.name - archive = SigMFArchive(self, file_path, fileobj) + archive = SigMFArchive(self, path=file_path, fileobj=fileobj) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): @@ -634,7 +634,10 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F if not self._is_conforming_dataset(): warnings.warn(f'Recording dataset appears non-compliant, resulting data may be erroneous') - return self._read_datafile(first_byte, count * self.get_num_channels(), autoscale, False) + return self._read_datafile(first_byte, + count * self.get_num_channels(), + autoscale, + raw_components) def _read_datafile(self, first_byte, nitems, autoscale, raw_components): ''' @@ -709,8 +712,10 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False): self._metadata = {self.COLLECTION_KEY:{}} self._metadata[self.COLLECTION_KEY][self.VERSION_KEY] = __version__ self._metadata[self.COLLECTION_KEY][self.STREAMS_KEY] = [] - else: + elif isinstance(metadata, dict): self._metadata = metadata + else: + self._metadata = json.loads(metadata) if metafiles is None: self.metafiles = [] @@ -726,6 +731,15 @@ def __len__(self): ''' return len(self.get_stream_names()) + def __eq__(self, other): + """Define equality between two `SigMFCollections's by comparing + metadata. + """ + if isinstance(other, SigMFCollection): + return self._metadata == other._metadata + + return False + def verify_stream_hashes(self): ''' compares the stream hashes in the collection metadata to the metadata files @@ -789,9 +803,24 @@ def get_collection_field(self, key, default=None): """ return self._metadata[self.COLLECTION_KEY].get(key, default) - def tofile(self, file_path, pretty=True): + def archive(self, file_path=None, fileobj=None): + """Dump contents to SigMF archive format. + + `file_path` is passed to SigMFArchive `path` and `fileobj` is passed to + SigMFArchive `fileobj`. + + """ + + sigmffiles = [] + for name in self.get_stream_names(): + sigmffile = self.get_SigMFFile(name) + sigmffiles.append(sigmffile) + archive = SigMFArchive(sigmffiles, self, file_path, fileobj) + return archive.path + + def tofile(self, file_path, pretty=True, toarchive=False): ''' - Write metadata file + Write metadata file or create archive. Parameters ---------- @@ -799,11 +828,17 @@ def tofile(self, file_path, pretty=True): Location to save. pretty : bool, default True When True will write more human-readable output, otherwise will be flat JSON. + toarchive : bool, default False + If True, create an archive from the collection file and recordings + instead of creating collection metadata file. ''' fns = get_sigmf_filenames(file_path) - with open(fns['collection_fn'], 'w') as fp: - self.dump(fp, pretty=pretty) - fp.write('\n') # text files should end in carriage return + if toarchive: + self.archive(fns['archive_fn']) + else: + with open(fns['collection_fn'], 'w') as fp: + self.dump(fp, pretty=pretty) + fp.write('\n') # text files should end in carriage return def get_SigMFFile(self, stream_name=None, stream_index=None): ''' diff --git a/tests/test_archive.py b/tests/test_archive.py index d340cb2..63bade7 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -1,5 +1,6 @@ import codecs import json +import os from pathlib import Path import tarfile import tempfile @@ -10,7 +11,10 @@ import jsonschema from sigmf import error, sigmffile -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive +from sigmf.archive import (SIGMF_COLLECTION_EXT, + SIGMF_DATASET_EXT, + SIGMF_METADATA_EXT, + SigMFArchive) from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 @@ -185,7 +189,7 @@ def test_create_archive_pathlike(test_sigmffile, test_alternate_sigmffile): def test_archive_names(test_sigmffile): with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: - a = SigMFArchive(test_sigmffile, t.name) + a = SigMFArchive(sigmffiles=test_sigmffile, path=t.name) assert a.path == t.name observed_sigmffile = sigmffile.fromarchive(t.name) assert observed_sigmffile.name == test_sigmffile.name @@ -200,3 +204,159 @@ def test_archive_names(test_sigmffile): test_sigmffile.tofile(t.name, toarchive=True) observed_sigmffile = sigmffile.fromarchive(t.name) assert observed_sigmffile.name == test_sigmffile.name + + +def test_single_recording_with_collection(test_sigmffile): + sigmf_meta_file = test_sigmffile.name + SIGMF_METADATA_EXT + try: + with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: + test_sigmffile.dump(sigmf_meta_fd) + test_collection = sigmffile.SigMFCollection([sigmf_meta_file]) + input_collection_json = test_collection.dumps(pretty=True) + with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: + archive = SigMFArchive(test_sigmffile, + test_collection, + fileobj=tmpfile) + with tarfile.open(archive.path) as tar: + # 1 collection_file + 1 dir + 1 meta file + 1 data file + assert len(tar.getmembers()) == 4 + for member in tar.getmembers(): + if member.isfile(): + if member.name.endswith(SIGMF_COLLECTION_EXT): + collection_file = tar.extractfile(member) + output_collection_json = json.load(collection_file) + assert (json.loads(input_collection_json) == + output_collection_json) + finally: + if os.path.exists(sigmf_meta_file): + os.remove(sigmf_meta_file) + + +def test_multiple_recordings_with_collection(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + sigmf_meta_files = [ + test_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT + ] + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + try: + for sigmf_meta_file, sigmf_file in zip(sigmf_meta_files, + input_sigmf_files): + with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: + sigmf_file.dump(sigmf_meta_fd) + test_collection = sigmffile.SigMFCollection(sigmf_meta_files) + input_collection_json = test_collection.dumps(pretty=True) + with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: + archive = SigMFArchive(input_sigmf_files, + test_collection, + fileobj=tmpfile) + with tarfile.open(archive.path) as tar: + # 1 collection_file + 3 dir + 3 meta file + 3 data file + assert len(tar.getmembers()) == 10 + for member in tar.getmembers(): + if member.isfile(): + if member.name.endswith(SIGMF_COLLECTION_EXT): + collection_file = tar.extractfile(member) + output_collection_json = json.load(collection_file) + assert (json.loads(input_collection_json) == + output_collection_json) + finally: + for sigmf_meta_file in sigmf_meta_files: + if os.path.exists(sigmf_meta_file): + os.remove(sigmf_meta_file) + + +def test_extra_sigmf_file_not_in_collection(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + sigmf_meta_files = [ + test_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT + ] + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + try: + for sigmf_meta_file, sigmf_file in zip(sigmf_meta_files, + input_sigmf_files): + with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: + sigmf_file.dump(sigmf_meta_fd) + test_collection = sigmffile.SigMFCollection(sigmf_meta_files[:2]) + with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: + with pytest.raises(error.SigMFValidationError): + SigMFArchive(input_sigmf_files, + test_collection, + fileobj=tmpfile) + finally: + for sigmf_meta_file in sigmf_meta_files: + if os.path.exists(sigmf_meta_file): + os.remove(sigmf_meta_file) + + +def test_extra_recording_not_in_sigmffiles(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + sigmf_meta_files = [ + test_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT + ] + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + try: + for sigmf_meta_file, sigmf_file in zip(sigmf_meta_files, + input_sigmf_files): + with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: + sigmf_file.dump(sigmf_meta_fd) + test_collection = sigmffile.SigMFCollection(sigmf_meta_files) + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: + with pytest.raises(error.SigMFValidationError): + SigMFArchive(input_sigmf_files[:2], + test_collection, + fileobj=tmpfile) + finally: + for sigmf_meta_file in sigmf_meta_files: + if os.path.exists(sigmf_meta_file): + os.remove(sigmf_meta_file) + + +def test_mismatched_sigmffiles_collection(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + sigmf_meta_files = [ + test_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT + ] + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + try: + for sigmf_meta_file, sigmf_file in zip(sigmf_meta_files, + input_sigmf_files): + with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: + sigmf_file.dump(sigmf_meta_fd) + test_collection = sigmffile.SigMFCollection(sigmf_meta_files[:2]) + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: + with pytest.raises(error.SigMFValidationError): + SigMFArchive(input_sigmf_files[1:3], + test_collection, + fileobj=tmpfile) + finally: + for sigmf_meta_file in sigmf_meta_files: + if os.path.exists(sigmf_meta_file): + os.remove(sigmf_meta_file) + + +def test_archive_no_path_or_fileobj(test_sigmffile): + """Error should be raised when no path or fileobj given.""" + with pytest.raises(error.SigMFFileError): + SigMFArchive(test_sigmffile) diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index e5a4ca5..d3139ea 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -1,9 +1,11 @@ +import os import tempfile import numpy as np from sigmf import SigMFFile, SigMFArchiveReader -from sigmf.archive import SigMFArchive +from sigmf.archive import SIGMF_METADATA_EXT, SigMFArchive +from sigmf.sigmffile import SigMFCollection def test_access_data_without_untar(test_sigmffile): @@ -68,3 +70,42 @@ def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile): assert len(reader) == 2 for expected in expected_sigmffiles: assert expected in reader.sigmffiles + + +def test_extract_single_recording_with_collection(test_sigmffile): + with tempfile.TemporaryDirectory() as tmpdir: + meta_filepath = os.path.join(tmpdir, + test_sigmffile.name + SIGMF_METADATA_EXT) + with open(meta_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + collection = SigMFCollection(metafiles=[meta_filepath]) + archive_path = os.path.join(tmpdir, "test_archive.sigmf") + arch = SigMFArchive(test_sigmffile, collection, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 1 + actual_sigmffile = reader[0] + assert test_sigmffile == actual_sigmffile + assert collection == reader.collection + + +def test_extract_multi_recording_with_collection(test_sigmffile, + test_alternate_sigmffile): + with tempfile.TemporaryDirectory() as tmpdir: + meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT + meta1_filepath = os.path.join(tmpdir, meta1_filepath) + with open(meta1_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT + meta2_filepath = os.path.join(tmpdir, meta2_filepath) + with open(meta2_filepath, "w") as meta_fd: + test_alternate_sigmffile.dump(meta_fd) + collection = SigMFCollection(metafiles=[meta1_filepath, + meta2_filepath]) + archive_path = os.path.join(tmpdir, "test_archive.sigmf") + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, collection, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 # number of SigMFFiles + for actual_sigmffile in reader: + assert actual_sigmffile in input_sigmffiles + assert collection == reader.collection diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index da036d2..3868198 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -26,8 +26,9 @@ import unittest from sigmf import sigmffile, utils +from sigmf.archivereader import SigMFArchiveReader from sigmf.sigmffile import SigMFFile, fromarchive -from sigmf.archive import SigMFArchive +from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive from .testdata import * @@ -296,3 +297,81 @@ def test_captures_checking(): assert (160,224) == sigmf4.get_capture_byte_boundarys(1) assert np.array_equal(np.array(range(64)), sigmf4.read_samples_in_capture(0,autoscale=False)[:,0]) assert np.array_equal(np.array(range(64,96)), sigmf4.read_samples_in_capture(1,autoscale=False)[:,1]) + + +def test_archive_collection(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + sigmf_meta_files = [ + test_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT + ] + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + data = [TEST_FLOAT32_DATA_1, TEST_FLOAT32_DATA_2, TEST_FLOAT32_DATA_3] + try: + for sigmf_meta_file, sigmf_file, _data in zip(sigmf_meta_files, + input_sigmf_files, + data): + with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: + sigmf_file.dump(sigmf_meta_fd) + sample_data = sigmf_file.read_samples(autoscale=False, + raw_components=True) + assert np.array_equal(sample_data, _data) + sample_data.tofile(sigmf_file.name + SIGMF_DATASET_EXT) + test_collection = sigmffile.SigMFCollection(sigmf_meta_files) + with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: + archive_path = test_collection.archive(fileobj=tmpfile) + archive_reader = SigMFArchiveReader(path=archive_path) + for input_sigmf_file in input_sigmf_files: + assert input_sigmf_file in archive_reader.sigmffiles + assert test_collection == archive_reader.collection + finally: + for sigmf_meta_file in sigmf_meta_files: + if os.path.exists(sigmf_meta_file): + os.remove(sigmf_meta_file) + for sigmf_file in input_sigmf_files: + filename = sigmf_file.name + SIGMF_DATASET_EXT + if os.path.exists(filename): + os.remove(filename) + + +def test_tofile_collection(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + sigmf_meta_files = [ + test_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile.name + SIGMF_METADATA_EXT, + test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT + ] + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + data = [TEST_FLOAT32_DATA_1, TEST_FLOAT32_DATA_2, TEST_FLOAT32_DATA_3] + try: + for sigmf_meta_file, sigmf_file, _data in zip(sigmf_meta_files, + input_sigmf_files, + data): + with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: + sigmf_file.dump(sigmf_meta_fd) + sample_data = sigmf_file.read_samples(autoscale=False, + raw_components=True) + assert np.array_equal(sample_data, _data) + sample_data.tofile(sigmf_file.name + SIGMF_DATASET_EXT) + test_collection = sigmffile.SigMFCollection(sigmf_meta_files) + with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: + test_collection.tofile(tmpfile.name, toarchive=True) + archive_reader = SigMFArchiveReader(path=tmpfile.name) + for input_sigmf_file in input_sigmf_files: + assert input_sigmf_file in archive_reader.sigmffiles + assert test_collection == archive_reader.collection + finally: + for sigmf_meta_file in sigmf_meta_files: + if os.path.exists(sigmf_meta_file): + os.remove(sigmf_meta_file) + for sigmf_file in input_sigmf_files: + filename = sigmf_file.name + SIGMF_DATASET_EXT + if os.path.exists(filename): + os.remove(filename) From 4cfc8c27e884b6aa3f90d3ca1abc45c919a3b00a Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 12 May 2023 14:24:22 -0600 Subject: [PATCH 11/28] rename collectionfile to collection --- sigmf/archive.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 75a503a..fb1e468 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -53,7 +53,7 @@ class SigMFArchive(): def __init__(self, sigmffiles: Union["sigmf.sigmffile.SigMFFile", Iterable["sigmf.sigmffile.SigMFFile"]], - collectionfile: "sigmf.sigmffile.SigMFCollection" = None, + collection: "sigmf.sigmffile.SigMFCollection" = None, path: Union[str, os.PathLike] = None, fileobj: BinaryIO = None): @@ -76,7 +76,7 @@ def __init__(self, else: self.path = None self.fileobj = fileobj - self.collectionfile = collectionfile + self.collection = collection self._check_input() @@ -100,9 +100,9 @@ def chmod(tarinfo): tarinfo.mode = 0o644 # -wr-r--r-- return tarinfo - if collectionfile: + if collection: with tempfile.NamedTemporaryFile(mode="w") as tmpfile: - collectionfile.dump(tmpfile, pretty=True) + collection.dump(tmpfile, pretty=True) tmpfile.flush() collection_filename = archive_name + SIGMF_COLLECTION_EXT sigmf_archive.add(tmpfile.name, @@ -136,8 +136,8 @@ def _check_input(self): self._ensure_sigmffile_name_set(sigmffile) self._ensure_data_file_set(sigmffile) self._validate_sigmffile_metadata(sigmffile) - if self.collectionfile: - self._validate_sigmffile_collection(self.collectionfile, + if self.collection: + self._validate_sigmffile_collection(self.collection, self.sigmffiles) def _ensure_path_has_correct_extension(self): From ea4e63363dea179ea471b4bf3b9bb6b4441e1eca Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 12 May 2023 14:28:03 -0600 Subject: [PATCH 12/28] make json end of file new line consistent, add support for collection to fromarchive() --- sigmf/sigmffile.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 8ffdaf6..925d3d7 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -78,6 +78,7 @@ def dump(self, filep, pretty=True): indent=4 if pretty else None, separators=(',', ': ') if pretty else None, ) + filep.write("\n") def dumps(self, pretty=True): ''' @@ -574,7 +575,6 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): else: with open(fns['meta_fn'], 'w') as fp: self.dump(fp, pretty=pretty) - fp.write('\n') # text files should end in carriage return def read_samples_in_capture(self, index=0, autoscale=True): ''' @@ -838,7 +838,6 @@ def tofile(self, file_path, pretty=True, toarchive=False): else: with open(fns['collection_fn'], 'w') as fp: self.dump(fp, pretty=pretty) - fp.write('\n') # text files should end in carriage return def get_SigMFFile(self, stream_name=None, stream_index=None): ''' @@ -958,17 +957,23 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None): def fromarchive(archive_path, dir=None): - """Extract an archive and return containing SigMFFiles. + """Extract an archive and return containing SigMFFiles and SigMFCollection. The `dir` parameter is no longer used as this function has been changed to access SigMF archives without extracting them. """ from .archivereader import SigMFArchiveReader - sigmffiles = SigMFArchiveReader(archive_path).sigmffiles + reader = SigMFArchiveReader(archive_path) + sigmffiles = reader.sigmffiles + sigmffile_ret = None if len(sigmffiles) == 1: - return sigmffiles[0] + sigmffile_ret = sigmffiles[0] else: - return sigmffiles + sigmffile_ret = sigmffiles + if reader.collection: + return sigmffile_ret, reader.collection + else: + return sigmffile_ret def fromfile(filename, skip_checksum=False): From 68c68256d35e15d713bc44223eb5d88e55ccea07 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 12 May 2023 14:35:21 -0600 Subject: [PATCH 13/28] add README examples for archives with multiple recordings --- README.md | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/README.md b/README.md index 54cd4f2..090070b 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,104 @@ ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16') cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32') ``` +### Create and Read SigMF Archives with Multiple Recordings + +```python +import numpy as np +from sigmf.archivereader import SigMFArchiveReader + +from sigmf.sigmffile import (SigMFFile, + SigMFArchive, + SigMFCollection, + fromarchive, + fromfile) + + +# create data file +random_data1 = np.random.rand(128) +data1_path = "recording1.sigmf-data" +random_data1.tofile(data1_path) + +# create metadata +sigmf_file_1 = SigMFFile(name='recording1') +sigmf_file_1.set_global_field("core:datatype", "rf32_le") +sigmf_file_1.add_annotation(start_index=0, length=len(random_data1)) +sigmf_file_1.add_capture(start_index=0) +sigmf_file_1.set_data_file(data1_path) + +# create archive using SigMFArchive +archive1 = SigMFArchive(sigmffiles=sigmf_file_1, + path="single_recording_archive1.sigmf") + +# create archive using SigMFFile archive() +archive1_path = sigmf_file_1.archive(file_path="single_recording_archive2.sigmf") + +# create archive using tofile +sigmf_file_1.tofile(file_path="single_recording_archive3.sigmf", + toarchive=True) + +# multiple recordings +random_data2 = np.random.rand(128) +data2_path = "recording2.sigmf-data" +random_data2.tofile(data2_path) + +# create metadata +sigmf_file_2 = SigMFFile(name='recording2') +sigmf_file_2.set_global_field("core:datatype", "rf32_le") +sigmf_file_2.add_annotation(start_index=0, length=len(random_data2)) +sigmf_file_2.add_capture(start_index=0) +sigmf_file_2.set_data_file(data2_path) + +# create archive using SigMFArchive +sigmffiles = [sigmf_file_1, sigmf_file_2] +archive2 = SigMFArchive(sigmffiles=sigmffiles, + path="multi_recording_archive1.sigmf") + +# create archive with collection +sigmf_file_1.tofile("recording1.sigmf-meta") +sigmf_file_2.tofile("recording2.sigmf-meta") +metafiles = ["recording1.sigmf-meta", "recording2.sigmf-meta"] +collection = SigMFCollection(metafiles=metafiles) + +# create archive using SigMFArchive +archive3 = SigMFArchive(sigmffiles=sigmffiles, + collection=collection, + path="multi_recording_archive2.sigmf") + +# create archive using collection archive +archive3_path = collection.archive(file_path="multi_recording_archive3.sigmf") + +# create archive using collection tofile +collection.tofile(file_path="multi_recording_archive4.sigmf", toarchive=True) + +# read multirecording archives using archive reader +reader = SigMFArchiveReader("multi_recording_archive1.sigmf") +print(len(reader)) # equal to 2 for 2 sigmffiles + +# read multirecording archives using fromarchive +sigmffiles = fromarchive("multi_recording_archive1.sigmf") +print(len(sigmffiles)) # equal to 2 for 2 sigmffiles + +# read multirecording archives using fromfile +sigmffiles = fromfile("multi_recording_archive1.sigmf") +print(len(sigmffiles)) # equal to 2 for 2 sigmffiles + +# read multirecording archives using archive reader with collection +reader = SigMFArchiveReader("multi_recording_archive2.sigmf") +print(len(reader)) # equal to 2 for 2 sigmffiles +print(reader.collection) + +# read multirecording archives using fromarchive with collection +sigmffiles, collection = fromarchive("multi_recording_archive2.sigmf") +print(len(sigmffiles)) # equal to 2 for 2 sigmffiles +print(collection) + +# read multirecording archives using fromfile with collection +sigmffiles, collection = fromfile("multi_recording_archive2.sigmf") +print(len(sigmffiles)) # equal to 2 for 2 sigmffiles +print(collection) +``` + ### Load a SigMF Archive and slice its data without untaring it Since an *archive* is merely a tarball (uncompressed), and since there any many From 454dd3428005619d01c1c1dd0b146a95c7b8ebf7 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 15 May 2023 11:03:16 -0600 Subject: [PATCH 14/28] fix archive docstring, remove unneeded variables from archivereader --- sigmf/archive.py | 13 ++++++++----- sigmf/archivereader.py | 2 -- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index fb1e468..20453e7 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -26,7 +26,8 @@ class SigMFArchive(): - """Archive one or more `SigMFFile`s. + """Archive one or more `SigMFFile`s. A collection file can + optionally be included. A `.sigmf` file must include both valid metadata and data. If `self.data_file` is not set or the requested output file @@ -34,17 +35,19 @@ class SigMFArchive(): Parameters: - sigmffile -- An iterable of SigMFFile objects with valid metadata and - data_files + sigmffiles -- A single SigMFFIle or an iterable of SigMFFile objects with + valid metadata and data_files - path -- path to archive file to create. If file exists, overwrite. + collection -- An optional SigMFCollection. + + path -- Path to archive file to create. If file exists, overwrite. If `path` doesn't end in .sigmf, it will be appended. The `self.path` instance variable will be updated upon successful writing of the archive to point to the final archive path. - fileobj -- If `fileobj` is specified, it is used as an alternative to + fileobj -- If `fileobj` is specified, it is used as an alternative to a file object opened in binary mode for `path`. If `fileobj` is an open tarfile, it will be appended to. It is supposed to be at position 0. `fileobj` won't be closed. If diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 7805790..b3fc296 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -91,8 +91,6 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu size_bytes=data_offset_size[1], map_readonly=map_readonly) - self.ndim = sigmffile.ndim - self.shape = sigmffile.shape self.sigmffiles.append(sigmffile) data_offset_size = None json_contents = None From af9002d149a475b9d9482fb7d761fb6ef68b52f2 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 15 May 2023 11:09:54 -0600 Subject: [PATCH 15/28] simplify SigMFCollection archive tests --- tests/test_sigmffile.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 3868198..3b6e7b9 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -328,39 +328,6 @@ def test_archive_collection(test_sigmffile, for input_sigmf_file in input_sigmf_files: assert input_sigmf_file in archive_reader.sigmffiles assert test_collection == archive_reader.collection - finally: - for sigmf_meta_file in sigmf_meta_files: - if os.path.exists(sigmf_meta_file): - os.remove(sigmf_meta_file) - for sigmf_file in input_sigmf_files: - filename = sigmf_file.name + SIGMF_DATASET_EXT - if os.path.exists(filename): - os.remove(filename) - - -def test_tofile_collection(test_sigmffile, - test_alternate_sigmffile, - test_alternate_sigmffile_2): - sigmf_meta_files = [ - test_sigmffile.name + SIGMF_METADATA_EXT, - test_alternate_sigmffile.name + SIGMF_METADATA_EXT, - test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT - ] - input_sigmf_files = [test_sigmffile, - test_alternate_sigmffile, - test_alternate_sigmffile_2] - data = [TEST_FLOAT32_DATA_1, TEST_FLOAT32_DATA_2, TEST_FLOAT32_DATA_3] - try: - for sigmf_meta_file, sigmf_file, _data in zip(sigmf_meta_files, - input_sigmf_files, - data): - with open(sigmf_meta_file, mode="w") as sigmf_meta_fd: - sigmf_file.dump(sigmf_meta_fd) - sample_data = sigmf_file.read_samples(autoscale=False, - raw_components=True) - assert np.array_equal(sample_data, _data) - sample_data.tofile(sigmf_file.name + SIGMF_DATASET_EXT) - test_collection = sigmffile.SigMFCollection(sigmf_meta_files) with tempfile.NamedTemporaryFile(suffix=".sigmf") as tmpfile: test_collection.tofile(tmpfile.name, toarchive=True) archive_reader = SigMFArchiveReader(path=tmpfile.name) From f1d108b31e91482cf164561b77e46b5519e1c974 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 15 May 2023 13:20:32 -0600 Subject: [PATCH 16/28] organize SigMFFile constructor doc string --- sigmf/sigmffile.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 925d3d7..f096f67 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -161,6 +161,13 @@ def __init__(self, Parameters ---------- + name: Name used for directory and filenames if archived. + For example, given `name=archive1`, then passing this + sigmffile to SigMFArchive will add the following files + to the archive: + - archive1/ + - archive1.sigmf-meta + - archive1.sigmf-data metadata: str or dict, optional Metadata for associated dataset. data_file: str, optional @@ -171,13 +178,6 @@ def __init__(self, When True will skip calculating hash on data_file (if present) to check against metadata. map_readonly: bool, default True Indicates whether assignments on the numpy.memmap are allowed. - name: Name used for directory and filenames if archived. - For example, given `name=archive1`, then passing this - sigmffile to SigMFArchive will add the following files - to the archive: - - archive1/ - - archive1.sigmf-meta - - archive1.sigmf-data ''' super(SigMFFile, self).__init__() self.data_file = None From a631eb39db6d59be1bc65d283bdb39d5a1119f18 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 26 May 2023 09:13:21 -0600 Subject: [PATCH 17/28] clarify different ways to do the same thing in README --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 090070b..01d022b 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,16 @@ cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32') ### Create and Read SigMF Archives with Multiple Recordings +The below example shows different ways to create and read an archive. The +`SigMFArchive` class, the `SigMFFile.archive()` method, and the +`SigMFFile.tofile()` method can all be used to create an archive. Archives +with collections can be created using `SigMFArchive` class, +`SigMFCollection.archive()` method, and the `SigMFCollection.tofile()` method. + +There are also different ways to read an archive using `SigMFArchiveReader` +class, the `sigmffile.fromarchive()` method, and the `sigmffile.fromfile()` +method. + ```python import numpy as np from sigmf.archivereader import SigMFArchiveReader From 74a7b86a9c568e61c2e7d54141f845a9a65d634d Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 26 May 2023 09:18:11 -0600 Subject: [PATCH 18/28] fix typo --- sigmf/archive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 20453e7..247180f 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -35,7 +35,7 @@ class SigMFArchive(): Parameters: - sigmffiles -- A single SigMFFIle or an iterable of SigMFFile objects with + sigmffiles -- A single SigMFFile or an iterable of SigMFFile objects with valid metadata and data_files collection -- An optional SigMFCollection. From 93ab02b4aa5a5cd7b79d84654e8fdf75826764af Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Tue, 30 May 2023 11:35:08 -0600 Subject: [PATCH 19/28] add support for passing SigMFFile objects to SigMFCollection to improve usability with SigMFArchiveReader --- README.md | 1 + sigmf/archivereader.py | 9 +---- sigmf/sigmffile.py | 78 +++++++++++++++++++++++++++---------- tests/test_archivereader.py | 25 ++++++++---- tests/test_sigmffile.py | 49 ++++++++++++++++++++++- 5 files changed, 125 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 01d022b..59e255b 100644 --- a/README.md +++ b/README.md @@ -276,6 +276,7 @@ print(len(sigmffiles)) # equal to 2 for 2 sigmffiles reader = SigMFArchiveReader("multi_recording_archive2.sigmf") print(len(reader)) # equal to 2 for 2 sigmffiles print(reader.collection) +print(len(reader.collection.sigmffiles)) # get SigMFFiles from collection # read multirecording archives using fromarchive with collection sigmffiles, collection = fromarchive("multi_recording_archive2.sigmf") diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index b3fc296..8842cd6 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -96,13 +96,8 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu json_contents = None sigmffile_name = None if collection_metadata: - # Currently the SigMFCollection class does not support getting - # SigMFFiles (SigMFCollection.get_SigMFFile()) when created - # here in SigMFArchiveReader. This is because the SigMF - # metadata files are not extracted from the tarfile to the - # file system. - self.collection = SigMFCollection(metadata=collection_metadata, - skip_checksums=True) + self.collection = SigMFCollection(metafiles=self.sigmffiles, + metadata=collection_metadata) else: self.collection = None diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 3e70414..d28b473 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -8,6 +8,7 @@ from collections import OrderedDict import codecs +from io import BytesIO import json import tarfile import tempfile @@ -98,7 +99,7 @@ def dumps(self, pretty=True): self.ordered_metadata(), indent=4 if pretty else None, separators=(',', ': ') if pretty else None, - ) + ) + "\n" class SigMFFile(SigMFMetafile): START_INDEX_KEY = "core:sample_start" @@ -697,9 +698,10 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False): Parameters: - metafiles -- A list of SigMF metadata filenames objects comprising the Collection, - there must be at least one file. If the files do not exist, this will - raise a SigMFFileError. + metafiles -- A list of SigMF metadata filenames objects or SigMFFile + objects comprising the Collection, there must be at least + one file. If the files do not exist, this will raise a + SigMFFileError. metadata -- collection metadata to use, if not provided this will populate a minimal set of default metadata. The core:streams field will be @@ -751,23 +753,58 @@ def verify_stream_hashes(self): if path.isfile(metafile_name): new_hash = sigmf_hash.calculate_sha512(filename=metafile_name) if old_hash != new_hash: - raise SigMFFileError(f'Calculated file hash for {metafile_name} does not match collection metadata.') + raise SigMFFileError('Calculated file hash for metadata ' + f'file {metafile_name} does not ' + 'match collection metadata.') + sigmffile = [x for x in self.sigmffiles + if x.name == stream.get('name')][0] + sigmffile_meta = sigmffile.dumps() + sigmffile_bytes = sigmffile_meta.encode('utf-8') + size_of_meta = len(sigmffile_bytes) + sigmffile_hash = sigmf_hash.calculate_sha512( + fileobj=BytesIO(sigmffile_bytes), + offset_and_size=(0, size_of_meta) + ) + if old_hash != sigmffile_hash: + raise SigMFFileError('Calculated file hash for SigMFFile ' + f'{sigmffile.name} does not match ' + 'collection metadata.') def set_streams(self, metafiles): ''' configures the collection `core:streams` field from the specified list of metafiles ''' - self.metafiles = metafiles streams = [] - for metafile in self.metafiles: - if metafile.endswith('.sigmf-meta') and path.isfile(metafile): - stream = { - "name": get_sigmf_filenames(metafile)['base_fn'], - "hash": sigmf_hash.calculate_sha512(filename=metafile) - } - streams.append(stream) - else: - raise SigMFFileError(f'Specifed stream file {metafile} is not a valid SigMF Metadata file') + sigmffile_names = [] + self.sigmffiles = [] + if isinstance(metafiles[0], SigMFFile): + for sigmffile in metafiles: + sigmffile_names.append(sigmffile.name + SIGMF_METADATA_EXT) + sigmffile_meta = sigmffile.dumps() + sigmffile_bytes = sigmffile_meta.encode('utf-8') + size_of_meta = len(sigmffile_bytes) + streams.append({ + "name": sigmffile.name, + "hash": sigmf_hash.calculate_sha512( + fileobj=BytesIO(sigmffile_bytes), + offset_and_size=(0, size_of_meta)) + }) + self.sigmffiles.append(sigmffile) + self.metafiles = sigmffile_names + else: + self.metafiles = metafiles + for metafile in self.metafiles: + if (metafile.endswith(SIGMF_METADATA_EXT) and + path.isfile(metafile)): + stream = { + "name": get_sigmf_filenames(metafile)['base_fn'], + "hash": sigmf_hash.calculate_sha512(filename=metafile) + } + streams.append(stream) + else: + raise SigMFFileError(f'Specifed stream file {metafile} is' + ' not a valid SigMF Metadata file') + self.sigmffiles.append(fromfile(metafile, skip_checksum=self.skip_checksums)) self.set_collection_field(self.STREAMS_KEY, streams) def get_stream_names(self): @@ -843,15 +880,14 @@ def get_SigMFFile(self, stream_name=None, stream_index=None): ''' Returns the SigMFFile instance of the specified stream if it exists ''' - metafile = None + sigmffile = None if stream_name is not None: - if stream_name in self.get_stream_names(): - metafile = stream_name + '.sigmf_meta' + sigmffile = [x for x in self.sigmffiles + if x.name == stream_name][0] if stream_index is not None and stream_index < self.__len__(): - metafile = self.get_stream_names()[stream_index] + '.sigmf_meta' + sigmffile = self.sigmffiles[stream_index] + return sigmffile - if metafile is not None: - return fromfile(metafile, skip_checksum=self.skip_checksums) def dtype_info(datatype): """ diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index d3139ea..9924dcf 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -73,35 +73,37 @@ def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile): def test_extract_single_recording_with_collection(test_sigmffile): - with tempfile.TemporaryDirectory() as tmpdir: - meta_filepath = os.path.join(tmpdir, - test_sigmffile.name + SIGMF_METADATA_EXT) + try: + meta_filepath = test_sigmffile.name + SIGMF_METADATA_EXT with open(meta_filepath, "w") as meta_fd: test_sigmffile.dump(meta_fd) collection = SigMFCollection(metafiles=[meta_filepath]) - archive_path = os.path.join(tmpdir, "test_archive.sigmf") + archive_path = "test_archive.sigmf" arch = SigMFArchive(test_sigmffile, collection, path=archive_path) reader = SigMFArchiveReader(arch.path) assert len(reader) == 1 actual_sigmffile = reader[0] assert test_sigmffile == actual_sigmffile assert collection == reader.collection + finally: + if os.path.exists(meta_filepath): + os.remove(meta_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) def test_extract_multi_recording_with_collection(test_sigmffile, test_alternate_sigmffile): - with tempfile.TemporaryDirectory() as tmpdir: + try: meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT - meta1_filepath = os.path.join(tmpdir, meta1_filepath) with open(meta1_filepath, "w") as meta_fd: test_sigmffile.dump(meta_fd) meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT - meta2_filepath = os.path.join(tmpdir, meta2_filepath) with open(meta2_filepath, "w") as meta_fd: test_alternate_sigmffile.dump(meta_fd) collection = SigMFCollection(metafiles=[meta1_filepath, meta2_filepath]) - archive_path = os.path.join(tmpdir, "test_archive.sigmf") + archive_path = "test_archive.sigmf" input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] arch = SigMFArchive(input_sigmffiles, collection, path=archive_path) reader = SigMFArchiveReader(arch.path) @@ -109,3 +111,10 @@ def test_extract_multi_recording_with_collection(test_sigmffile, for actual_sigmffile in reader: assert actual_sigmffile in input_sigmffiles assert collection == reader.collection + finally: + if os.path.exists(meta1_filepath): + os.remove(meta1_filepath) + if os.path.exists(meta2_filepath): + os.remove(meta2_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 3b6e7b9..5838f98 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -27,7 +27,7 @@ from sigmf import sigmffile, utils from sigmf.archivereader import SigMFArchiveReader -from sigmf.sigmffile import SigMFFile, fromarchive +from sigmf.sigmffile import SigMFCollection, SigMFFile, fromarchive from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive from .testdata import * @@ -334,6 +334,8 @@ def test_archive_collection(test_sigmffile, for input_sigmf_file in input_sigmf_files: assert input_sigmf_file in archive_reader.sigmffiles assert test_collection == archive_reader.collection + for input_sigmf_file in input_sigmf_files: + assert input_sigmf_file in test_collection.sigmffiles finally: for sigmf_meta_file in sigmf_meta_files: if os.path.exists(sigmf_meta_file): @@ -342,3 +344,48 @@ def test_archive_collection(test_sigmffile, filename = sigmf_file.name + SIGMF_DATASET_EXT if os.path.exists(filename): os.remove(filename) + + +def test_create_collection_with_sigmffiles(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + collection = SigMFCollection(metafiles=input_sigmf_files) + output_stream_names = collection.get_stream_names() + output_sigmf_files_by_name = [] + for stream_name in output_stream_names: + output_sigmf_file = collection.get_SigMFFile(stream_name=stream_name) + output_sigmf_files_by_name.append(output_sigmf_file) + output_sigmf_files_by_index = [] + for i in range(len(collection)): + output_sigmf_file = collection.get_SigMFFile(stream_index=i) + output_sigmf_files_by_index.append(output_sigmf_file) + for input_sigmf in input_sigmf_files: + assert input_sigmf.name in output_stream_names + assert input_sigmf in output_sigmf_files_by_name + assert input_sigmf in output_sigmf_files_by_index + + +def test_collection_set_sigmffiles(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + collection = SigMFCollection(metafiles=[test_sigmffile]) + collection.set_streams(input_sigmf_files) + output_stream_names = collection.get_stream_names() + output_sigmf_files_by_name = [] + for stream_name in output_stream_names: + output_sigmf_file = collection.get_SigMFFile(stream_name=stream_name) + output_sigmf_files_by_name.append(output_sigmf_file) + output_sigmf_files_by_index = [] + for i in range(len(collection)): + output_sigmf_file = collection.get_SigMFFile(stream_index=i) + output_sigmf_files_by_index.append(output_sigmf_file) + for input_sigmf in input_sigmf_files: + assert input_sigmf.name in output_stream_names + assert input_sigmf in output_sigmf_files_by_name + assert input_sigmf in output_sigmf_files_by_index From 5376ece515173066b274e2551dae5ddd74710e35 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 1 Jun 2023 08:40:34 -0600 Subject: [PATCH 20/28] fix SigMFCollection docstring --- sigmf/sigmffile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index d28b473..d7b3fc2 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -698,7 +698,7 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False): Parameters: - metafiles -- A list of SigMF metadata filenames objects or SigMFFile + metafiles -- A list of SigMF metadata filenames or SigMFFile objects comprising the Collection, there must be at least one file. If the files do not exist, this will raise a SigMFFileError. From 46e7d8f3d3e7875664e419fcceb963ee639dc666 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 1 Jun 2023 10:04:25 -0600 Subject: [PATCH 21/28] SigMFCollection set_streams() will check type for each element of metafiles input --- sigmf/sigmffile.py | 36 ++++++++++++++++++++------------- tests/test_sigmffile.py | 45 ++++++++++++++++++++++++----------------- 2 files changed, 49 insertions(+), 32 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index d7b3fc2..ff0794a 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -10,6 +10,7 @@ import codecs from io import BytesIO import json +import os import tarfile import tempfile from os import path @@ -19,7 +20,7 @@ from . import __version__, schema, sigmf_hash, validate from .archive import SigMFArchive, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT, SIGMF_COLLECTION_EXT from .utils import dict_merge -from .error import SigMFFileError, SigMFAccessError +from .error import SigMFError, SigMFFileError, SigMFAccessError class SigMFMetafile(): VALID_KEYS = {} @@ -772,13 +773,15 @@ def verify_stream_hashes(self): def set_streams(self, metafiles): ''' - configures the collection `core:streams` field from the specified list of metafiles + configures the collection `core:streams` field from the specified list + of metafiles or SigMFFiles ''' streams = [] sigmffile_names = [] self.sigmffiles = [] - if isinstance(metafiles[0], SigMFFile): - for sigmffile in metafiles: + + for sigmffile in metafiles: + if isinstance(sigmffile, SigMFFile): sigmffile_names.append(sigmffile.name + SIGMF_METADATA_EXT) sigmffile_meta = sigmffile.dumps() sigmffile_bytes = sigmffile_meta.encode('utf-8') @@ -790,21 +793,26 @@ def set_streams(self, metafiles): offset_and_size=(0, size_of_meta)) }) self.sigmffiles.append(sigmffile) - self.metafiles = sigmffile_names - else: - self.metafiles = metafiles - for metafile in self.metafiles: - if (metafile.endswith(SIGMF_METADATA_EXT) and - path.isfile(metafile)): + elif (isinstance(sigmffile, str) or + isinstance(sigmffile, os.PathLike)): + sigmffile_names.append(str(sigmffile)) + if (str(sigmffile).endswith(SIGMF_METADATA_EXT) and + path.isfile(sigmffile)): stream = { - "name": get_sigmf_filenames(metafile)['base_fn'], - "hash": sigmf_hash.calculate_sha512(filename=metafile) + "name": get_sigmf_filenames(sigmffile)['base_fn'], + "hash": sigmf_hash.calculate_sha512(filename=sigmffile) } streams.append(stream) else: - raise SigMFFileError(f'Specifed stream file {metafile} is' + raise SigMFFileError(f'Specifed stream file {sigmffile} is' ' not a valid SigMF Metadata file') - self.sigmffiles.append(fromfile(metafile, skip_checksum=self.skip_checksums)) + self.sigmffiles.append( + fromfile(sigmffile, skip_checksum=self.skip_checksums) + ) + else: + raise SigMFError("Unknown type, set_streams() input must be" + " list of metafiles or SigMFFiles") + self.metafiles = sigmffile_names self.set_collection_field(self.STREAMS_KEY, streams) def get_stream_names(self): diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 5838f98..4069052 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -371,21 +371,30 @@ def test_create_collection_with_sigmffiles(test_sigmffile, def test_collection_set_sigmffiles(test_sigmffile, test_alternate_sigmffile, test_alternate_sigmffile_2): - input_sigmf_files = [test_sigmffile, - test_alternate_sigmffile, - test_alternate_sigmffile_2] - collection = SigMFCollection(metafiles=[test_sigmffile]) - collection.set_streams(input_sigmf_files) - output_stream_names = collection.get_stream_names() - output_sigmf_files_by_name = [] - for stream_name in output_stream_names: - output_sigmf_file = collection.get_SigMFFile(stream_name=stream_name) - output_sigmf_files_by_name.append(output_sigmf_file) - output_sigmf_files_by_index = [] - for i in range(len(collection)): - output_sigmf_file = collection.get_SigMFFile(stream_index=i) - output_sigmf_files_by_index.append(output_sigmf_file) - for input_sigmf in input_sigmf_files: - assert input_sigmf.name in output_stream_names - assert input_sigmf in output_sigmf_files_by_name - assert input_sigmf in output_sigmf_files_by_index + try: + input_sigmf_files = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + third_sigmf_meta_filename = test_alternate_sigmffile_2.name + SIGMF_METADATA_EXT + streams_input = [test_sigmffile, test_alternate_sigmffile, third_sigmf_meta_filename] + with open(third_sigmf_meta_filename, "w") as out_f: + test_alternate_sigmffile_2.dump(out_f) + + collection = SigMFCollection(metafiles=[test_sigmffile]) + collection.set_streams(streams_input) + output_stream_names = collection.get_stream_names() + output_sigmf_files_by_name = [] + for stream_name in output_stream_names: + output_sigmf_file = collection.get_SigMFFile(stream_name=stream_name) + output_sigmf_files_by_name.append(output_sigmf_file) + output_sigmf_files_by_index = [] + for i in range(len(collection)): + output_sigmf_file = collection.get_SigMFFile(stream_index=i) + output_sigmf_files_by_index.append(output_sigmf_file) + for input_sigmf in input_sigmf_files: + assert input_sigmf.name in output_stream_names + assert input_sigmf in output_sigmf_files_by_name + assert input_sigmf in output_sigmf_files_by_index + finally: + if os.path.exists(third_sigmf_meta_filename): + os.remove(third_sigmf_meta_filename) \ No newline at end of file From 660ba82257c5f709292e1aecce7861f6a449d49a Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 1 Jun 2023 12:28:25 -0600 Subject: [PATCH 22/28] break up and simplify archive examples in README --- README.md | 116 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 60 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 59e255b..816d0a4 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,32 @@ handle.get_captures() # returns list of 'captures' dictionaries handle.get_annotations() # returns list of all annotations ``` +### Load a SigMF archive with multiple recordings +There are different ways to read an archive using `SigMFArchiveReader` +class, the `sigmffile.fromarchive()` method, and the `sigmffile.fromfile()` +method. + +```python +import numpy as np +from sigmf.archivereader import SigMFArchiveReader + +from sigmf.sigmffile import (fromarchive, + fromfile) +# read multirecording archives using fromarchive +sigmffiles = fromarchive("multi_recording_archive1.sigmf") +print(len(sigmffiles)) + +# read multirecording archives using fromfile +sigmffiles = fromfile("multi_recording_archive1.sigmf") +print(len(sigmffiles)) + +# read multirecording archives using archive reader with collection +reader = SigMFArchiveReader("multi_recording_archive2.sigmf") +print(len(reader)) +print(reader.collection) +print(len(reader.collection.sigmffiles)) # get SigMFFiles from collection +``` + ### Verify SigMF dataset integrity & compliance ```bash @@ -180,27 +206,15 @@ ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16') cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32') ``` -### Create and Read SigMF Archives with Multiple Recordings - -The below example shows different ways to create and read an archive. The -`SigMFArchive` class, the `SigMFFile.archive()` method, and the -`SigMFFile.tofile()` method can all be used to create an archive. Archives -with collections can be created using `SigMFArchive` class, -`SigMFCollection.archive()` method, and the `SigMFCollection.tofile()` method. - -There are also different ways to read an archive using `SigMFArchiveReader` -class, the `sigmffile.fromarchive()` method, and the `sigmffile.fromfile()` -method. +### Create a SigMF Archive +The `SigMFArchive` class, the `SigMFFile.archive()` method, and the +`SigMFFile.tofile()` method can all be used to create an archive. ```python import numpy as np -from sigmf.archivereader import SigMFArchiveReader from sigmf.sigmffile import (SigMFFile, - SigMFArchive, - SigMFCollection, - fromarchive, - fromfile) + SigMFArchive) # create data file @@ -225,68 +239,58 @@ archive1_path = sigmf_file_1.archive(file_path="single_recording_archive2.sigmf" # create archive using tofile sigmf_file_1.tofile(file_path="single_recording_archive3.sigmf", toarchive=True) +``` + +### Create SigMF Archives with Multiple Recordings +Archives with collections can be created using `SigMFArchive` class, +`SigMFCollection.archive()` method, and the `SigMFCollection.tofile()` method. + +```python +import numpy as np + +from sigmf.sigmffile import (SigMFFile, + SigMFArchive, + SigMFCollection) + + +# create data files +random_data1 = np.random.rand(128) +data1_path = "recording1.sigmf-data" +random_data1.tofile(data1_path) -# multiple recordings random_data2 = np.random.rand(128) data2_path = "recording2.sigmf-data" random_data2.tofile(data2_path) # create metadata +sigmf_file_1 = SigMFFile(name='recording1') +sigmf_file_1.set_global_field("core:datatype", "rf32_le") +sigmf_file_1.add_annotation(start_index=0, length=len(random_data1)) +sigmf_file_1.add_capture(start_index=0) +sigmf_file_1.set_data_file(data1_path) + sigmf_file_2 = SigMFFile(name='recording2') sigmf_file_2.set_global_field("core:datatype", "rf32_le") sigmf_file_2.add_annotation(start_index=0, length=len(random_data2)) sigmf_file_2.add_capture(start_index=0) sigmf_file_2.set_data_file(data2_path) -# create archive using SigMFArchive -sigmffiles = [sigmf_file_1, sigmf_file_2] -archive2 = SigMFArchive(sigmffiles=sigmffiles, - path="multi_recording_archive1.sigmf") - -# create archive with collection +# create collection sigmf_file_1.tofile("recording1.sigmf-meta") sigmf_file_2.tofile("recording2.sigmf-meta") metafiles = ["recording1.sigmf-meta", "recording2.sigmf-meta"] collection = SigMFCollection(metafiles=metafiles) -# create archive using SigMFArchive +# create archive using SigMFArchive without collection +sigmffiles = [sigmf_file_1, sigmf_file_2] archive3 = SigMFArchive(sigmffiles=sigmffiles, - collection=collection, - path="multi_recording_archive2.sigmf") + path="multi_recording_archive1.sigmf") # create archive using collection archive -archive3_path = collection.archive(file_path="multi_recording_archive3.sigmf") +archive3_path = collection.archive(file_path="multi_recording_archive2.sigmf") # create archive using collection tofile -collection.tofile(file_path="multi_recording_archive4.sigmf", toarchive=True) - -# read multirecording archives using archive reader -reader = SigMFArchiveReader("multi_recording_archive1.sigmf") -print(len(reader)) # equal to 2 for 2 sigmffiles - -# read multirecording archives using fromarchive -sigmffiles = fromarchive("multi_recording_archive1.sigmf") -print(len(sigmffiles)) # equal to 2 for 2 sigmffiles - -# read multirecording archives using fromfile -sigmffiles = fromfile("multi_recording_archive1.sigmf") -print(len(sigmffiles)) # equal to 2 for 2 sigmffiles - -# read multirecording archives using archive reader with collection -reader = SigMFArchiveReader("multi_recording_archive2.sigmf") -print(len(reader)) # equal to 2 for 2 sigmffiles -print(reader.collection) -print(len(reader.collection.sigmffiles)) # get SigMFFiles from collection - -# read multirecording archives using fromarchive with collection -sigmffiles, collection = fromarchive("multi_recording_archive2.sigmf") -print(len(sigmffiles)) # equal to 2 for 2 sigmffiles -print(collection) - -# read multirecording archives using fromfile with collection -sigmffiles, collection = fromfile("multi_recording_archive2.sigmf") -print(len(sigmffiles)) # equal to 2 for 2 sigmffiles -print(collection) +collection.tofile(file_path="multi_recording_archive3.sigmf", toarchive=True) ``` ### Load a SigMF Archive and slice its data without untaring it From e2919d868da470c598838cff39d3c3f36bda6061 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 1 Jun 2023 13:15:20 -0600 Subject: [PATCH 23/28] fix docstring, add ability to control pretty print JSON for archive --- sigmf/archive.py | 10 +++++++--- sigmf/sigmffile.py | 21 ++++++++++----------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 247180f..7ca3f74 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -52,13 +52,17 @@ class SigMFArchive(): `fileobj` is an open tarfile, it will be appended to. It is supposed to be at position 0. `fileobj` won't be closed. If `fileobj` is given, `path` has no effect. + + pretty -- If True, pretty print JSON when creating the metadata and + collection files in the archive. Defaults to True. """ def __init__(self, sigmffiles: Union["sigmf.sigmffile.SigMFFile", Iterable["sigmf.sigmffile.SigMFFile"]], collection: "sigmf.sigmffile.SigMFCollection" = None, path: Union[str, os.PathLike] = None, - fileobj: BinaryIO = None): + fileobj: BinaryIO = None, + pretty=True): if (not path) and (not fileobj): raise SigMFFileError("'path' or 'fileobj' required for creating " @@ -105,7 +109,7 @@ def chmod(tarinfo): if collection: with tempfile.NamedTemporaryFile(mode="w") as tmpfile: - collection.dump(tmpfile, pretty=True) + collection.dump(tmpfile, pretty=pretty) tmpfile.flush() collection_filename = archive_name + SIGMF_COLLECTION_EXT sigmf_archive.add(tmpfile.name, @@ -120,7 +124,7 @@ def chmod(tarinfo): sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename) with open(sigmf_md_path, "w") as mdfile: - sigmffile.dump(mdfile, pretty=True) + sigmffile.dump(mdfile, pretty=pretty) shutil.copy(sigmffile.data_file, sigmf_data_path) sigmf_archive.add(tmpdir, arcname=sigmffile.name, filter=chmod) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index ff0794a..58af947 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -542,17 +542,16 @@ def validate(self): version = self.get_global_field(self.VERSION_KEY) validate.validate(self._metadata, self.get_schema()) - def archive(self, file_path=None, fileobj=None): + def archive(self, file_path=None, fileobj=None, pretty=True): """Dump contents to SigMF archive format. - `file_path` is passed to SigMFArchive `path` and `fileobj` is passed to - SigMFArchive `fileobj`. - + `file_path` is passed to SigMFArchive `path`, `fileobj` is passed to + SigMFArchive `fileobj`, and `pretty` is passed to SigMFArchive `pretty`. """ if file_path is None: file_path = self.name - archive = SigMFArchive(self, path=file_path, fileobj=fileobj) + archive = SigMFArchive(self, path=file_path, fileobj=fileobj, pretty=pretty) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): @@ -573,7 +572,7 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): self.validate() fns = get_sigmf_filenames(file_path) if toarchive: - self.archive(fns['archive_fn']) + self.archive(fns['archive_fn'], pretty=pretty) else: with open(fns['meta_fn'], 'w') as fp: self.dump(fp, pretty=pretty) @@ -706,7 +705,7 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False): metadata -- collection metadata to use, if not provided this will populate a minimal set of default metadata. The core:streams field will be - regenerated automatically + regenerated automatically. Can be str or dict. """ super(SigMFCollection, self).__init__() self.skip_checksums = skip_checksums @@ -848,11 +847,11 @@ def get_collection_field(self, key, default=None): """ return self._metadata[self.COLLECTION_KEY].get(key, default) - def archive(self, file_path=None, fileobj=None): + def archive(self, file_path=None, fileobj=None, pretty=True): """Dump contents to SigMF archive format. `file_path` is passed to SigMFArchive `path` and `fileobj` is passed to - SigMFArchive `fileobj`. + SigMFArchive `fileobj`, and `pretty` is passed to SigMFArchive `pretty`. """ @@ -860,7 +859,7 @@ def archive(self, file_path=None, fileobj=None): for name in self.get_stream_names(): sigmffile = self.get_SigMFFile(name) sigmffiles.append(sigmffile) - archive = SigMFArchive(sigmffiles, self, file_path, fileobj) + archive = SigMFArchive(sigmffiles, self, file_path, fileobj, pretty=pretty) return archive.path def tofile(self, file_path, pretty=True, toarchive=False): @@ -879,7 +878,7 @@ def tofile(self, file_path, pretty=True, toarchive=False): ''' fns = get_sigmf_filenames(file_path) if toarchive: - self.archive(fns['archive_fn']) + self.archive(fns['archive_fn'], pretty=pretty) else: with open(fns['collection_fn'], 'w') as fp: self.dump(fp, pretty=pretty) From e4e1775687fe5f75c5739cca85bf6c13ea3f47ac Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 2 Jun 2023 10:00:42 -0600 Subject: [PATCH 24/28] update docstrings, formatting --- sigmf/sigmffile.py | 47 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 58af947..bb3e6d4 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -545,13 +545,30 @@ def validate(self): def archive(self, file_path=None, fileobj=None, pretty=True): """Dump contents to SigMF archive format. - `file_path` is passed to SigMFArchive `path`, `fileobj` is passed to - SigMFArchive `fileobj`, and `pretty` is passed to SigMFArchive `pretty`. + Keyword arguments: + file_path -- passed to SigMFArchive`path`. Path to archive file to + create. If file exists, overwrite. If `path` doesn't end + in .sigmf, it will be appended. If not given, `file_path` + will be set to self.name. (default None) + fileobj -- passed to SigMFArchive `fileobj`. If `fileobj` is + specified, it is used as an alternative to a file object + opened in binary mode for `file_path`. If `fileobj` is an + open tarfile, it will be appended to. It is supposed to + be at position 0. `fileobj` won't be closed. If `fileobj` + is given, `file_path` has no effect. (default None) + pretty -- passed to SigMFArchive `pretty`. If True, pretty print + JSON when creating the metadata and collection files in + the archive. (default True). + + Returns the path to the created archive. """ if file_path is None: file_path = self.name - archive = SigMFArchive(self, path=file_path, fileobj=fileobj, pretty=pretty) + archive = SigMFArchive(self, + path=file_path, + fileobj=fileobj, + pretty=pretty) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): @@ -850,16 +867,32 @@ def get_collection_field(self, key, default=None): def archive(self, file_path=None, fileobj=None, pretty=True): """Dump contents to SigMF archive format. - `file_path` is passed to SigMFArchive `path` and `fileobj` is passed to - SigMFArchive `fileobj`, and `pretty` is passed to SigMFArchive `pretty`. - + Keyword arguments: + file_path -- passed to SigMFArchive`path`. Path to archive file to + create. If file exists, overwrite. If `path` doesn't end + in .sigmf, it will be appended. (default None) + fileobj -- passed to SigMFArchive `fileobj`. If `fileobj` is + specified, it is used as an alternative to a file object + opened in binary mode for `file_path`. If `fileobj` is an + open tarfile, it will be appended to. It is supposed to + be at position 0. `fileobj` won't be closed. If `fileobj` + is given, `file_path` has no effect. (default None) + pretty -- passed to SigMFArchive `pretty`. If True, pretty print + JSON when creating the metadata and collection files in + the archive. (default True). + + Returns the path to the created archive. """ sigmffiles = [] for name in self.get_stream_names(): sigmffile = self.get_SigMFFile(name) sigmffiles.append(sigmffile) - archive = SigMFArchive(sigmffiles, self, file_path, fileobj, pretty=pretty) + archive = SigMFArchive(sigmffiles, + self, + file_path, + fileobj, + pretty=pretty) return archive.path def tofile(self, file_path, pretty=True, toarchive=False): From 3131683cbf22e534f069f390681981b05af2a3c2 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 2 Jun 2023 10:48:55 -0600 Subject: [PATCH 25/28] improve docstrings, remove duplicative test, add test for fromarchive with collection --- sigmf/archivereader.py | 5 +++++ sigmf/sigmffile.py | 9 +++++++-- tests/test_archive.py | 11 ----------- tests/test_sigmffile.py | 17 ++++++++++++++++- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 8842cd6..eebb0e7 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -25,6 +25,11 @@ class can be used to iterate through multiple SigMFFiles in the archive. path -- path to archive file to access. If file does not exist, or if `path` doesn't end in .sigmf, SigMFFileError is raised. + + self.sigmffiles will contain the SigMFFile(s) (metadata/data) found in the + archive. + + self.collection will contain the SigMFCollection if found in the archive. """ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_buffer=None): self.path = path diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index bb3e6d4..f706791 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -1037,6 +1037,11 @@ def fromarchive(archive_path, dir=None): The `dir` parameter is no longer used as this function has been changed to access SigMF archives without extracting them. + + If the archive contains a single recording, a single SigMFFile object will + be returned. If the archive contains multiple recordings a list of + SigMFFile objects will be returned. If the archive contains a collection, + a tuple (SigMFFile(s), SigMFCollection) will be returned. """ from .archivereader import SigMFArchiveReader reader = SigMFArchiveReader(archive_path) @@ -1068,8 +1073,8 @@ def fromfile(filename, skip_checksum=False): Returns ------- - object - SigMFFile object with dataset & metadata or a SigMFCollection depending on the type of file + SigMFFile object(s) with dataset & metadata and/or a SigMFCollection + depending on the type of file or contents of archive. ''' fns = get_sigmf_filenames(filename) meta_fn = fns['meta_fn'] diff --git a/tests/test_archive.py b/tests/test_archive.py index 63bade7..17ad052 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -118,17 +118,6 @@ def test_tarfile_names_and_extensions(test_sigmffile): assert file2_ext in file_extensions -def test_sf_fromarchive_multirec(test_sigmffile, test_alternate_sigmffile): - """`SigMFFile.fromarchive` should return list of SigMFFiles.""" - with tempfile.NamedTemporaryFile(delete=True) as tf: - # Create a multi-recording archive - input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] - arch = SigMFArchive(input_sigmffiles, path=tf.name) - output_sigmf_files = sigmffile.fromarchive(archive_path=arch.path) - assert len(output_sigmf_files) == 2 - assert input_sigmffiles == output_sigmf_files - - def test_multirec_archive_into_fileobj(test_sigmffile, test_alternate_sigmffile): with tempfile.NamedTemporaryFile() as t: diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 4069052..491a1fa 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -148,6 +148,21 @@ def test_fromarchive_multi_recording(test_sigmffile, assert list_of_sigmffiles[2] == test_alternate_sigmffile_2 +def test_fromarchive_multirec_with_collection(test_sigmffile, + test_alternate_sigmffile): + with tempfile.NamedTemporaryFile(delete=True) as tf: + # Create a multi-recording archive with collection + in_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + in_collection = SigMFCollection(in_sigmffiles) + arch = SigMFArchive(in_sigmffiles, + collection=in_collection, + path=tf.name) + out_sigmffiles, out_collection = fromarchive(archive_path=arch.path) + assert len(out_sigmffiles) == 2 + assert in_sigmffiles == out_sigmffiles + assert in_collection == out_collection + + def test_add_multiple_captures_and_annotations(): sigf = SigMFFile(name="test") for idx in range(3): @@ -397,4 +412,4 @@ def test_collection_set_sigmffiles(test_sigmffile, assert input_sigmf in output_sigmf_files_by_index finally: if os.path.exists(third_sigmf_meta_filename): - os.remove(third_sigmf_meta_filename) \ No newline at end of file + os.remove(third_sigmf_meta_filename) From 29827af18681b0682defa4719516372bf25f15c8 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 5 Jun 2023 13:30:08 -0600 Subject: [PATCH 26/28] fix error message --- sigmf/archivereader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index eebb0e7..1b499fd 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -46,7 +46,7 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') else: - raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None') + raise ValueError('In sigmf.archivereader.__init__(), either `path` or `archive_buffer` must be not None') json_contents = None data_offset_size = None From b81289bdbb7cd44ca3569ce3f0ffee389c812458 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Tue, 6 Jun 2023 13:52:33 -0600 Subject: [PATCH 27/28] make archives work when using folders --- sigmf/archive.py | 7 +++- sigmf/archivereader.py | 5 ++- sigmf/sigmffile.py | 39 +++++++++++--------- tests/test_archive.py | 27 ++++++++++++++ tests/test_archivereader.py | 73 +++++++++++++++++++++++++++++++++++++ 5 files changed, 129 insertions(+), 22 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 7ca3f74..251fe5a 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -118,9 +118,12 @@ def chmod(tarinfo): for sigmffile in self.sigmffiles: with tempfile.TemporaryDirectory() as tmpdir: - sigmf_md_filename = sigmffile.name + SIGMF_METADATA_EXT + sigmffile_name = sigmffile.name + if os.sep in sigmffile.name: + _, sigmffile_name = os.path.split(sigmffile.name) + sigmf_md_filename = sigmffile_name + SIGMF_METADATA_EXT sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename) - sigmf_data_filename = sigmffile.name + SIGMF_DATASET_EXT + sigmf_data_filename = sigmffile_name + SIGMF_DATASET_EXT sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename) with open(sigmf_md_path, "w") as mdfile: diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 1b499fd..19be77f 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -71,8 +71,9 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu with tar_obj.extractfile(memb) as memb_fid: json_contents = memb_fid.read() - _, sigmffile_name = os.path.split(memb.name) - sigmffile_name, _ = os.path.splitext(sigmffile_name) + # recording name is the path of folder + # containing data/metadata + sigmffile_name, _ = os.path.split(memb.name) elif memb.name.endswith(SIGMF_DATASET_EXT): data_offset_size = memb.offset_data, memb.size data_found = True diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index f706791..a4a894b 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -726,6 +726,7 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False): """ super(SigMFCollection, self).__init__() self.skip_checksums = skip_checksums + self.sigmffiles = [] if metadata is None: self._metadata = {self.COLLECTION_KEY:{}} @@ -773,19 +774,20 @@ def verify_stream_hashes(self): raise SigMFFileError('Calculated file hash for metadata ' f'file {metafile_name} does not ' 'match collection metadata.') - sigmffile = [x for x in self.sigmffiles - if x.name == stream.get('name')][0] - sigmffile_meta = sigmffile.dumps() - sigmffile_bytes = sigmffile_meta.encode('utf-8') - size_of_meta = len(sigmffile_bytes) - sigmffile_hash = sigmf_hash.calculate_sha512( - fileobj=BytesIO(sigmffile_bytes), - offset_and_size=(0, size_of_meta) - ) - if old_hash != sigmffile_hash: - raise SigMFFileError('Calculated file hash for SigMFFile ' - f'{sigmffile.name} does not match ' - 'collection metadata.') + if self.sigmffiles: + sigmffile = [x for x in self.sigmffiles + if x.name == stream.get('name')][0] + sigmffile_meta = sigmffile.dumps() + sigmffile_bytes = sigmffile_meta.encode('utf-8') + size_of_meta = len(sigmffile_bytes) + sigmffile_hash = sigmf_hash.calculate_sha512( + fileobj=BytesIO(sigmffile_bytes), + offset_and_size=(0, size_of_meta) + ) + if old_hash != sigmffile_hash: + raise SigMFFileError('Calculated file hash for SigMFFile ' + f'{sigmffile.name} does not match ' + 'collection metadata.') def set_streams(self, metafiles): ''' @@ -921,11 +923,12 @@ def get_SigMFFile(self, stream_name=None, stream_index=None): Returns the SigMFFile instance of the specified stream if it exists ''' sigmffile = None - if stream_name is not None: - sigmffile = [x for x in self.sigmffiles - if x.name == stream_name][0] - if stream_index is not None and stream_index < self.__len__(): - sigmffile = self.sigmffiles[stream_index] + if self.sigmffiles: + if stream_name is not None: + sigmffile = [x for x in self.sigmffiles + if x.name == stream_name][0] + if stream_index is not None and stream_index < self.__len__(): + sigmffile = self.sigmffiles[stream_index] return sigmffile diff --git a/tests/test_archive.py b/tests/test_archive.py index 17ad052..e6b48a2 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -118,6 +118,33 @@ def test_tarfile_names_and_extensions(test_sigmffile): assert file2_ext in file_extensions +def test_tarfile_names_and_extensions_with_paths(test_sigmffile): + with tempfile.NamedTemporaryFile() as temp: + test_sigmffile.name = os.path.join("test_folder", "test") + sigmf_tarfile = create_test_archive(test_sigmffile, temp) + basedir, file1, file2 = sigmf_tarfile.getmembers() + sigmffile_name = basedir.name + assert sigmffile_name == test_sigmffile.name + archive_name = sigmf_tarfile.name + assert archive_name == temp.name + path.split(temp.name)[-1] + file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT} + + file1_name, file1_ext = path.splitext(file1.name) + assert file1_ext in file_extensions + # name of recording should match folder containing sigmf metadata/data + assert path.split(file1_name)[0] == test_sigmffile.name + assert path.split(file1_name)[-1] == path.basename(test_sigmffile.name) + + file_extensions.remove(file1_ext) + + file2_name, file2_ext = path.splitext(file2.name) + # name of recording should match folder containing sigmf metadata/data + assert path.split(file2_name)[0] == test_sigmffile.name + assert path.split(file2_name)[-1] == path.basename(test_sigmffile.name) + assert file2_ext in file_extensions + + def test_multirec_archive_into_fileobj(test_sigmffile, test_alternate_sigmffile): with tempfile.NamedTemporaryFile() as t: diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index 9924dcf..04d682b 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -1,4 +1,5 @@ import os +import shutil import tempfile import numpy as np @@ -118,3 +119,75 @@ def test_extract_multi_recording_with_collection(test_sigmffile, os.remove(meta2_filepath) if os.path.exists(archive_path): os.remove(archive_path) + + +def test_archivereader_different_folder(test_sigmffile, + test_alternate_sigmffile): + try: + os.makedirs("folder1", exist_ok=True) + test_sigmffile.name = os.path.join("folder1", "test1") + os.makedirs("folder2", exist_ok=True) + test_alternate_sigmffile.name = os.path.join("folder2", "test2") + meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT + with open(meta1_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT + with open(meta2_filepath, "w") as meta_fd: + test_alternate_sigmffile.dump(meta_fd) + collection = SigMFCollection(metafiles=[meta1_filepath, + meta2_filepath]) + os.makedirs("archive_folder", exist_ok=True) + archive_path = os.path.join("archive_folder", "test_archive.sigmf") + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, collection, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 # number of SigMFFiles + for actual_sigmffile in reader: + assert actual_sigmffile in input_sigmffiles + assert collection == reader.collection + finally: + if os.path.exists(meta1_filepath): + os.remove(meta1_filepath) + if os.path.exists(meta2_filepath): + os.remove(meta2_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) + if os.path.exists("folder1"): + shutil.rmtree("folder1") + if os.path.exists("folder2"): + shutil.rmtree("folder2") + if os.path.exists("archive_folder"): + shutil.rmtree("archive_folder") + + +def test_archivereader_same_folder(test_sigmffile, + test_alternate_sigmffile): + try: + os.makedirs("folder1", exist_ok=True) + test_sigmffile.name = os.path.join("folder1", "test1") + test_alternate_sigmffile.name = os.path.join("folder1", "test2") + meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT + with open(meta1_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT + with open(meta2_filepath, "w") as meta_fd: + test_alternate_sigmffile.dump(meta_fd) + collection = SigMFCollection(metafiles=[meta1_filepath, + meta2_filepath]) + archive_path = os.path.join("folder1", "test_archive.sigmf") + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, collection, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 # number of SigMFFiles + for actual_sigmffile in reader: + assert actual_sigmffile in input_sigmffiles + assert collection == reader.collection + finally: + if os.path.exists(meta1_filepath): + os.remove(meta1_filepath) + if os.path.exists(meta2_filepath): + os.remove(meta2_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) + if os.path.exists("folder1"): + shutil.rmtree("folder1") From 15ca451e4dd2b7b643e2e4bbdd756d9a378502da Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 8 Jun 2023 10:46:57 -0600 Subject: [PATCH 28/28] folders in archives are no longer created by default to maintain consistency with collection stream names, also adding files directly to tar instead of using temp folder --- sigmf/archive.py | 50 +++++++++++++++++++++++++++++------------- sigmf/archivereader.py | 4 +--- tests/test_archive.py | 35 ++++++++++++----------------- 3 files changed, 50 insertions(+), 39 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 251fe5a..5176c9d 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -7,10 +7,11 @@ """Create and extract SigMF archives.""" import collections +from io import BytesIO import os -import shutil import tarfile import tempfile +import time from typing import BinaryIO, Iterable, Union import sigmf @@ -117,20 +118,23 @@ def chmod(tarinfo): filter=chmod) for sigmffile in self.sigmffiles: - with tempfile.TemporaryDirectory() as tmpdir: - sigmffile_name = sigmffile.name - if os.sep in sigmffile.name: - _, sigmffile_name = os.path.split(sigmffile.name) - sigmf_md_filename = sigmffile_name + SIGMF_METADATA_EXT - sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename) - sigmf_data_filename = sigmffile_name + SIGMF_DATASET_EXT - sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename) - - with open(sigmf_md_path, "w") as mdfile: - sigmffile.dump(mdfile, pretty=pretty) - - shutil.copy(sigmffile.data_file, sigmf_data_path) - sigmf_archive.add(tmpdir, arcname=sigmffile.name, filter=chmod) + if os.path.sep in sigmffile.name: + parent, _ = os.path.split(sigmffile.name) + self._create_parent_dirs(sigmf_archive, parent, chmod) + sf_md_filename = sigmffile.name + SIGMF_METADATA_EXT + sf_data_filename = sigmffile.name + SIGMF_DATASET_EXT + metadata = sigmffile.dumps(pretty=pretty) + metadata_tarinfo = tarfile.TarInfo(sf_md_filename) + metadata_tarinfo.size = len(metadata) + metadata_tarinfo.mtime = time.time() + metadata_tarinfo = chmod(metadata_tarinfo) + metadata_buffer = BytesIO(metadata.encode("utf-8")) + sigmf_archive.addfile(metadata_tarinfo, fileobj=metadata_buffer) + data_tarinfo = sigmf_archive.gettarinfo(name=sigmffile.data_file, + arcname=sf_data_filename) + data_tarinfo = chmod(data_tarinfo) + with open(sigmffile.data_file, "rb") as data_file: + sigmf_archive.addfile(data_tarinfo, fileobj=data_file) sigmf_archive.close() if not fileobj: @@ -140,6 +144,22 @@ def chmod(tarinfo): self.path = sigmf_archive.name + def _create_parent_dirs(self, _tarfile, sigmffile_name, set_permission): + path_components = sigmffile_name.split(os.path.sep) + current_path = "" + for path in path_components: + current_path = os.path.join(current_path, path) + path_found = False + for member in _tarfile.getmembers(): + if member.name == current_path: + path_found = True + break + if not path_found: + tarinfo = tarfile.TarInfo(current_path) + tarinfo.type = tarfile.DIRTYPE + tarinfo = set_permission(tarinfo) + _tarfile.addfile(tarinfo) + def _check_input(self): self._ensure_path_has_correct_extension() for sigmffile in self.sigmffiles: diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 19be77f..8356fe2 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -71,9 +71,7 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu with tar_obj.extractfile(memb) as memb_fid: json_contents = memb_fid.read() - # recording name is the path of folder - # containing data/metadata - sigmffile_name, _ = os.path.split(memb.name) + sigmffile_name, _ = os.path.splitext(memb.name) elif memb.name.endswith(SIGMF_DATASET_EXT): data_offset_size = memb.offset_data, memb.size data_found = True diff --git a/tests/test_archive.py b/tests/test_archive.py index e6b48a2..0aaa6e6 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -55,8 +55,7 @@ def test_name_used_in_fileobj(test_sigmffile): sigmf_archive = test_sigmffile.archive(file_path="testarchive", fileobj=temp) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r") - basedir, file1, file2 = sigmf_tarfile.getmembers() - assert basedir.name == test_sigmffile.name + file1, file2 = sigmf_tarfile.getmembers() assert sigmf_tarfile.name == temp.name def filename(tarinfo): @@ -90,8 +89,7 @@ def test_unwritable_name_throws_fileerror(test_sigmffile): def test_tarfile_layout(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) - basedir, file1, file2 = sigmf_tarfile.getmembers() - assert tarfile.TarInfo.isdir(basedir) + file1, file2 = sigmf_tarfile.getmembers() assert tarfile.TarInfo.isfile(file1) assert tarfile.TarInfo.isfile(file2) @@ -99,9 +97,7 @@ def test_tarfile_layout(test_sigmffile): def test_tarfile_names_and_extensions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) - basedir, file1, file2 = sigmf_tarfile.getmembers() - sigmffile_name = basedir.name - assert sigmffile_name == test_sigmffile.name + file1, file2 = sigmf_tarfile.getmembers() archive_name = sigmf_tarfile.name assert archive_name == temp.name path.split(temp.name)[-1] @@ -123,8 +119,7 @@ def test_tarfile_names_and_extensions_with_paths(test_sigmffile): test_sigmffile.name = os.path.join("test_folder", "test") sigmf_tarfile = create_test_archive(test_sigmffile, temp) basedir, file1, file2 = sigmf_tarfile.getmembers() - sigmffile_name = basedir.name - assert sigmffile_name == test_sigmffile.name + assert "test_folder" == basedir.name archive_name = sigmf_tarfile.name assert archive_name == temp.name path.split(temp.name)[-1] @@ -132,16 +127,12 @@ def test_tarfile_names_and_extensions_with_paths(test_sigmffile): file1_name, file1_ext = path.splitext(file1.name) assert file1_ext in file_extensions - # name of recording should match folder containing sigmf metadata/data - assert path.split(file1_name)[0] == test_sigmffile.name - assert path.split(file1_name)[-1] == path.basename(test_sigmffile.name) + assert file1_name == test_sigmffile.name file_extensions.remove(file1_ext) file2_name, file2_ext = path.splitext(file2.name) - # name of recording should match folder containing sigmf metadata/data - assert path.split(file2_name)[0] == test_sigmffile.name - assert path.split(file2_name)[-1] == path.basename(test_sigmffile.name) + assert file2_name == test_sigmffile.name assert file2_ext in file_extensions @@ -153,11 +144,13 @@ def test_multirec_archive_into_fileobj(test_sigmffile, # add a second one to the same fileobj multirec_tar = create_test_archive(test_alternate_sigmffile, t) members = multirec_tar.getmembers() - assert len(members) == 6 # 2 directories and 2 files per directory + assert len(members) == 4 # 2 metadata files + 2 data files def test_tarfile_persmissions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: + # add 'test1' to name to create 'test1' folder + test_sigmffile.name = "test1/test1" sigmf_tarfile = create_test_archive(test_sigmffile, temp) basedir, file1, file2 = sigmf_tarfile.getmembers() assert basedir.mode == 0o755 @@ -168,7 +161,7 @@ def test_tarfile_persmissions(test_sigmffile): def test_contents(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) - basedir, file1, file2 = sigmf_tarfile.getmembers() + file1, file2 = sigmf_tarfile.getmembers() if file1.name.endswith(SIGMF_METADATA_EXT): mdfile = file1 datfile = file2 @@ -234,8 +227,8 @@ def test_single_recording_with_collection(test_sigmffile): test_collection, fileobj=tmpfile) with tarfile.open(archive.path) as tar: - # 1 collection_file + 1 dir + 1 meta file + 1 data file - assert len(tar.getmembers()) == 4 + # 1 collection_file + 1 meta file + 1 data file + assert len(tar.getmembers()) == 3 for member in tar.getmembers(): if member.isfile(): if member.name.endswith(SIGMF_COLLECTION_EXT): @@ -271,8 +264,8 @@ def test_multiple_recordings_with_collection(test_sigmffile, test_collection, fileobj=tmpfile) with tarfile.open(archive.path) as tar: - # 1 collection_file + 3 dir + 3 meta file + 3 data file - assert len(tar.getmembers()) == 10 + # 1 collection_file + 3 meta file + 3 data file + assert len(tar.getmembers()) == 7 for member in tar.getmembers(): if member.isfile(): if member.name.endswith(SIGMF_COLLECTION_EXT):