Skip to content

Fix #2812: Drop HDF5 support #3138

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ tsk_strerror_internal(int err)
break;
case TSK_ERR_FILE_VERSION_TOO_OLD:
ret = "tskit file version too old. Please upgrade using the "
"'tskit upgrade' command. (TSK_ERR_FILE_VERSION_TOO_OLD)";
"'tskit upgrade' command from tskit version<0.6.2. (TSK_ERR_FILE_VERSION_TOO_OLD)";
break;
case TSK_ERR_FILE_VERSION_TOO_NEW:
ret = "tskit file version is too new for this instance. "
Expand Down
2 changes: 1 addition & 1 deletion c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ A file could not be read because it is in the wrong format
/**
The file is in tskit format, but the version is too old for the
library to read. The file should be upgraded to the latest version
using the ``tskit upgrade`` command line utility.
using the ``tskit upgrade`` command line utility from tskit version<0.6.2.
*/
#define TSK_ERR_FILE_VERSION_TOO_OLD -101
/**
Expand Down
2 changes: 1 addition & 1 deletion docs/file-formats.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ stored as well as the top-level metadata.
### Legacy Versions

Tree sequence files written by older versions of tskit are not readable by
newer versions of tskit. For major releases of tskit, `tskit upgrade`
newer versions of tskit. For tskit releases<0.6.2, `tskit upgrade`
will convert older tree sequence files to the latest version.


Expand Down
7 changes: 6 additions & 1 deletion python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
- Metadata.schema was returning a modified schema, this is fixed to return a copy of
the original schema instead (:user:`benjeffery`, :issue:`3129`, :pr:`3130`)

**Breaking Changes**

- Legacy formats from msprime<0.6 (HDF5 formats) support is dropped. This includes the
support for ``tskit upgrade`` (:user:`hossam26644`, :issue:`2812`, :pr:`3138`)

--------------------
[0.6.1] - 2025-03-31
--------------------
Expand Down Expand Up @@ -142,7 +147,7 @@
pass a subset of nodes, so subtrees can be visually collapsed. Additionally, an option
``pack_untracked_polytomies`` allows large polytomies involving untracked samples to
be summarised as a dotted line (:user:`hyanwong`, :issue:`3011` :pr:`3010`, :pr:`3012`)

- Added a ``title`` parameter to ``.draw_svg()`` methods (:user:`hyanwong`, :pr:`3015`)

- Add comma separation to all display numbers. (:user:`benjeffery`, :issue:`3017`, :pr:`3018`)
Expand Down
2 changes: 1 addition & 1 deletion python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ handle_library_error(int err)
const char *not_kas_format_msg
= "File not in kastore format. Either the file is corrupt or it is not a "
"tskit tree sequence file. It may be a legacy HDF file upgradable with "
"`tskit upgrade` or a compressed tree sequence file that can be decompressed "
"`tskit upgrade` from tskit version<0.6.2 or a compressed tree sequence file that can be decompressed "
"with `tszip`.";
const char *ibd_pairs_not_stored_msg
= "Sample pairs are not stored by default "
Expand Down
1 change: 0 additions & 1 deletion python/requirements/CI-complete/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
biopython==1.85
coverage==7.7.0
dendropy==5.0.1
h5py==3.13.0
kastore==0.3.3
lshmm==0.0.8
msgpack==1.1.0
Expand Down
1 change: 0 additions & 1 deletion python/requirements/CI-tests-conda/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
msprime==1.3.3
tszip==0.2.5
h5py==3.13.0
zarr<3
3 changes: 1 addition & 2 deletions python/requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ codecov
coverage
dendropy
flake8
h5py>=2.6.0
jsonschema>=3.0.0
jupyter-book>=0.12.1
kastore
Expand All @@ -22,7 +21,7 @@ numpy
packaging
portion
pre-commit
pyparsing
pyparsing
pysam
pytest
pytest-cov
Expand Down
1 change: 0 additions & 1 deletion python/requirements/development.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ dependencies:
- dendropy
- doxygen
- flake8
- h5py>=2.6.0
- jsonschema>=3.0.0
- jupyter-book>=0.12.1
- kastore
Expand Down
78 changes: 1 addition & 77 deletions python/tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2018-2024 Tskit Developers
# Copyright (c) 2018-2025 Tskit Developers
# Copyright (c) 2017 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -30,7 +30,6 @@
import unittest
from unittest import mock

import h5py
import msprime
import pytest

Expand Down Expand Up @@ -311,16 +310,6 @@ def test_vcf_allow_position_zero(self, flags, expected):
assert args.tree_sequence == tree_sequence
assert args.allow_position_zero == expected

def test_upgrade_default_values(self):
parser = cli.get_tskit_parser()
cmd = "upgrade"
source = "in.trees"
destination = "out.trees"
args = parser.parse_args([cmd, source, destination])
assert args.source == source
assert args.destination == destination
assert not args.remove_duplicate_positions

def test_info_default_values(self):
parser = cli.get_tskit_parser()
cmd = "info"
Expand Down Expand Up @@ -655,68 +644,3 @@ def test_migrations(self):

def test_provenances(self):
self.verify("provenances")


class TestUpgrade(TestCli):
"""
Tests the results of the upgrade operation to ensure they are
correct.
"""

def setUp(self):
fd, self.legacy_file_name = tempfile.mkstemp(prefix="msp_cli", suffix=".trees")
os.close(fd)
fd, self.current_file_name = tempfile.mkstemp(prefix="msp_cli", suffix=".trees")
os.close(fd)

def tearDown(self):
os.unlink(self.legacy_file_name)
os.unlink(self.current_file_name)

def test_conversion(self):
ts1 = msprime.simulate(10)
for version in [2, 3]:
tskit.dump_legacy(ts1, self.legacy_file_name, version=version)
stdout, stderr = capture_output(
cli.tskit_main,
["upgrade", self.legacy_file_name, self.current_file_name],
)
ts2 = tskit.load(self.current_file_name)
assert stdout == ""
assert stderr == ""
# Quick checks to ensure we have the right tree sequence.
# More thorough checks are done elsewhere.
assert ts1.get_sample_size() == ts2.get_sample_size()
assert ts1.num_edges == ts2.num_edges
assert ts1.get_num_trees() == ts2.get_num_trees()

def test_duplicate_positions(self):
ts = msprime.simulate(10, mutation_rate=10)
for version in [2, 3]:
tskit.dump_legacy(ts, self.legacy_file_name, version=version)
root = h5py.File(self.legacy_file_name, "r+")
root["mutations/position"][:] = 0
root.close()
stdout, stderr = capture_output(
cli.tskit_main,
["upgrade", "-d", self.legacy_file_name, self.current_file_name],
)
assert stdout == ""
tsp = tskit.load(self.current_file_name)
assert tsp.sample_size == ts.sample_size
assert tsp.num_sites == 1

def test_duplicate_positions_error(self):
ts = msprime.simulate(10, mutation_rate=10)
for version in [2, 3]:
tskit.dump_legacy(ts, self.legacy_file_name, version=version)
root = h5py.File(self.legacy_file_name, "r+")
root["mutations/position"][:] = 0
root.close()
with mock.patch("sys.exit", side_effect=TestException) as mocked_exit:
with pytest.raises(TestException):
capture_output(
cli.tskit_main,
["upgrade", self.legacy_file_name, self.current_file_name],
)
assert mocked_exit.call_count == 1
Loading
Loading