diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml new file mode 100644 index 0000000000..42620cef1e --- /dev/null +++ b/.github/workflows/emscripten.yml @@ -0,0 +1,112 @@ +# Attributed to NumPy https://github.com/numpy/numpy/pull/25894 +# https://github.com/numpy/numpy/blob/d2d2c25fa81b47810f5cbd85ea6485eb3a3ffec3/.github/workflows/emscripten.yml + +name: Pyodide wheel + +on: + # TODO: refine after this is ready to merge + [push, pull_request, workflow_dispatch] + +env: + FORCE_COLOR: 3 + PYODIDE_VERSION: 0.28.0a3 + # PYTHON_VERSION and EMSCRIPTEN_VERSION are determined by PYODIDE_VERSION. + # The appropriate versions can be found in the Pyodide repodata.json + # "info" field, or in Makefile.envs: + # https://github.com/pyodide/pyodide/blob/main/Makefile.envs#L2 + PYTHON_VERSION: 3.13 # any 3.13.x version works + EMSCRIPTEN_VERSION: 4.0.9 + NODE_VERSION: 22 + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + build_wasm_emscripten: + name: Build and test Zarr for Pyodide + runs-on: ubuntu-latest + # To enable this workflow on a fork, comment out: + # FIXME: uncomment after this is ready to merge + # if: github.repository == 'zarr-developers/zarr-python' + steps: + - name: Checkout Zarr repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + + - name: Set up Python ${{ env.PYTHON_VERSION }} + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Set up Emscripten toolchain + uses: mymindstorm/setup-emsdk@v14 + with: + version: ${{ env.EMSCRIPTEN_VERSION }} + actions-cache-folder: emsdk-cache + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Install pyodide-build + run: python -m pip install pyodide-build + + - name: Build Zarr for Pyodide + run: | + pyodide xbuildenv install ${{ env.PYODIDE_VERSION }} + pyodide build + + ### (Temporarily) build numcodecs as well, as we have an older version in the Pyodide distribution (v0.13.1) + + - name: Clone numcodecs repository + uses: actions/checkout@v4 + with: + # See https://github.com/zarr-developers/numcodecs/pull/529 + repository: agriyakhetarpal/numcodecs + ref: setup-emscripten-ci + path: numcodecs-wasm + submodules: recursive + fetch-depth: 0 + fetch-tags: true + + # For some reason fetch-depth: 0 and fetch-tags: true aren't working... + - name: Manually fetch tags for numcodecs + working-directory: numcodecs-wasm + run: git fetch --tags + + - name: Build numcodecs for WASM + run: pyodide build + working-directory: numcodecs-wasm + env: + DISABLE_NUMCODECS_AVX2: 1 + DISABLE_NUMCODECS_SSE2: 1 + + ### Back to Zarr repository to run tests + + - name: Run Zarr tests for Pyodide + run: | + # Set up Pyodide virtual environment and activate it + pyodide venv .venv-pyodide + source .venv-pyodide/bin/activate + + # Install numcodecs + pip install $(ls numcodecs-wasm/dist/*.whl)"[crc32c]" + + # Install Zarr without dependencies until we can figure out the + # numcodecs wheel versioning issue + pip install dist/*.whl --no-deps + pip install "packaging>=22.0" "numpy>=1.25" "typing_extensions>=4.9" "donfig>=0.8" + + # Install test dependencies + pip install "coverage" "pytest" "pytest-asyncio" "pytest-cov" "pytest-accept" "rich" "mypy" "hypothesis" + + python -m pytest tests -v --cov=zarr --cov-config=pyproject.toml + diff --git a/changes/1903.feature.rst b/changes/1903.feature.rst new file mode 100644 index 0000000000..f2db94f95e --- /dev/null +++ b/changes/1903.feature.rst @@ -0,0 +1,6 @@ +Added official support for the Pyodide/WebAssembly platform for using Zarr within browser-based environments. +The ``threading.max_workers`` parameter takes a default value of 1, and the ``zarr.sync`` interface is not +supported. At the moment, using Zarr requires the JavaScript Promise Integration (JSPI) WebAssembly feature +to be enabled with Pyodide and is hidden behind flags in web browsers to enable experimental support. See the +`JavaScript Promise Integration reference `_ and +`WebAssembly feature status `_ pages for more details. diff --git a/pyproject.toml b/pyproject.toml index 1f270b435f..cec7783986 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -410,6 +410,7 @@ filterwarnings = [ "ignore:The dtype .* is currently not part in the Zarr format 3 specification.*:UserWarning", "ignore:Use zarr.create_array instead.:DeprecationWarning", "ignore:Duplicate name.*:UserWarning", + "ignore:Error cleaning up asyncio loop.*:RuntimeWarning", # appears in Pyodide/WASM as it uses its own browser-based event loop "ignore:The `compressor` argument is deprecated. Use `compressors` instead.:UserWarning", "ignore:Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.:UserWarning", "ignore:Unclosed client session None: # numcodecs 0.13.0 introduces the checksum attribute for the zstd codec - _numcodecs_version = Version(numcodecs.__version__) - if _numcodecs_version < Version("0.13.0"): - raise RuntimeError( - "numcodecs version >= 0.13.0 is required to use the zstd codec. " - f"Version {_numcodecs_version} is currently installed." - ) + # _numcodecs_version = Version(numcodecs.__version__) + # if _numcodecs_version < Version("0.13.0"): + # raise RuntimeError( + # "numcodecs version >= 0.13.0 is required to use the zstd codec. " + # f"Version {_numcodecs_version} is currently installed." + # ) level_parsed = parse_zstd_level(level) checksum_parsed = parse_checksum(checksum) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 2a10943d80..ead6b4a96e 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -33,6 +33,8 @@ from donfig import Config as DConfig +from zarr._constants import IS_WASM + if TYPE_CHECKING: from donfig.config_obj import ConfigSet @@ -107,7 +109,7 @@ def enable_gpu(self) -> ConfigSet: }, }, "async": {"concurrency": 10, "timeout": None}, - "threading": {"max_workers": None}, + "threading": {"max_workers": 1 if IS_WASM else None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", diff --git a/src/zarr/core/sync.py b/src/zarr/core/sync.py index d9b4839e8e..e29363d166 100644 --- a/src/zarr/core/sync.py +++ b/src/zarr/core/sync.py @@ -6,10 +6,12 @@ import os import threading from concurrent.futures import ThreadPoolExecutor, wait +from textwrap import dedent from typing import TYPE_CHECKING, Any, TypeVar from typing_extensions import ParamSpec +from zarr._constants import IS_WASM from zarr.core.config import config if TYPE_CHECKING: @@ -133,6 +135,46 @@ def sync( -------- >>> sync(async_function(), existing_loop) """ + # WASM environments (like Pyodide) cannot start new threads, so we need to handle + # coroutines differently. We integrate with the existing Pyodide WebLoop which + # schedules tasks on the browser's event loop using setTimeout(): + # https://developer.mozilla.org/en-US/docs/Web/API/setTimeout + if IS_WASM: # pragma: no cover + # This code path is covered in the Pyodide/WASM CI job. + current_loop = asyncio.get_running_loop() + result = current_loop.run_until_complete(coro) + # Check if run_until_complete actually executed the coroutine or just returned a task + # In browsers without JSPI, run_until_complete is a no-op that will return the task/future. + if isinstance(result, (asyncio.Task, asyncio.Future)): + raise RuntimeError( + dedent(""" + Cannot use synchronous zarr API in browser-based environments without JSPI. + Zarr requires JavaScript Promise Integration (JSPI) to work in browsers, + but JSPI is not enabled in your environment. + + The available solutions are to either use Zarr's async API instead with + zarr.api.asynchronous, or if you want to use your existing code, follow + these steps (all required): + 1. Enable JSPI in your Pyodide setup with + `loadPyodide({ enableRunUntilComplete: true })` AND + 2. Use a JSPI-enabled website or browser configuration (for example, with + --enable-features=WebAssemblyExperimentalJSPI for Google Chrome). If you + are the owner of a website, you may sign up for an origin trial for JSPI. + + If you are using Node.js, pass the --experimental-wasm-jspi flag + (available for v20+). + + Note: JSPI is experimental and not yet standardised across all browsers. + See https://webassembly.org/features/ for more information and status, + https://v8.dev/blog/jspi#how-can-i-use-jspi-today%3F for usage, and + https://v8.dev/blog/jspi-ot for more information on origin trials. + """) + ) + return result + + # This code path is the original thread-based implementation + # for non-WASM environments; it creates a dedicated I/O thread + # with its own event loop. if loop is None: # NB: if the loop is not running *yet*, it is OK to submit work # and we will wait for it @@ -170,6 +212,13 @@ def _get_loop() -> asyncio.AbstractEventLoop: The loop will be running on a separate thread. """ + if IS_WASM: # pragma: no cover + # This case is covered in the Pyodide/WASM CI job. + raise RuntimeError( + "Thread-based event loop not available in WASM environment. " + "Use zarr.api.asynchronous or ensure sync() handles WASM case." + ) + if loop[0] is None: with _get_lock(): # repeat the check just in case the loop got filled between the diff --git a/tests/conftest.py b/tests/conftest.py index 948d3cd055..53115aa294 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,7 @@ from hypothesis import HealthCheck, Verbosity, settings from zarr import AsyncGroup, config +from zarr._constants import IS_WASM from zarr.abc.store import Store from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation from zarr.core.array import ( @@ -176,15 +177,30 @@ def pytest_addoption(parser: Any) -> None: default=False, help="run slow hypothesis tests", ) + parser.addoption( + "--run-slow-wasm", + action="store_true", + default=False, + help="run slow tests only applicable to WASM", + ) def pytest_collection_modifyitems(config: Any, items: Any) -> None: if config.getoption("--run-slow-hypothesis"): return + if config.getoption("--run-slow-wasm") and IS_WASM: + return + skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run") + skip_slow_wasm = pytest.mark.skip( + reason="need --run-slow-wasm option to run in WASM, or not running in WASM" + ) + for item in items: if "slow_hypothesis" in item.keywords: item.add_marker(skip_slow_hyp) + if "slow_wasm" in item.keywords and IS_WASM: + item.add_marker(skip_slow_wasm) settings.register_profile( diff --git a/tests/test_array.py b/tests/test_array.py index 3fc7b3938c..bc6ee7e2cc 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -18,6 +18,7 @@ import zarr.api.asynchronous import zarr.api.synchronous as sync_api from zarr import Array, AsyncArray, Group +from zarr._constants import IS_WASM from zarr.abc.store import Store from zarr.codecs import ( BytesCodec, @@ -1677,6 +1678,10 @@ def _index_array(arr: Array, index: Any) -> Any: return arr[index] +@pytest.mark.skipif( + IS_WASM, + reason="can't start new processes in Pyodide", +) @pytest.mark.parametrize( "method", [ diff --git a/tests/test_codecs/test_blosc.py b/tests/test_codecs/test_blosc.py index 6e6e9df383..4140d9154f 100644 --- a/tests/test_codecs/test_blosc.py +++ b/tests/test_codecs/test_blosc.py @@ -6,6 +6,7 @@ from packaging.version import Version import zarr +from zarr._constants import IS_WASM from zarr.abc.store import Store from zarr.codecs import BloscCodec from zarr.core.buffer import default_buffer_prototype @@ -58,6 +59,7 @@ async def test_blosc_evolve(store: Store, dtype: str) -> None: assert blosc_configuration_json["shuffle"] == "shuffle" +@pytest.mark.xfail(IS_WASM, reason="Blosc size mismatch, known failure case for Pyodide/WASM") async def test_typesize() -> None: a = np.arange(1000000, dtype=np.uint64) codecs = [zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()] diff --git a/tests/test_config.py b/tests/test_config.py index 2cbf172752..6f3673557a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -10,6 +10,7 @@ import zarr import zarr.api from zarr import zeros +from zarr._constants import IS_WASM from zarr.abc.codec import CodecPipeline from zarr.abc.store import ByteSetter, Store from zarr.codecs import ( @@ -83,7 +84,7 @@ def test_config_defaults_set() -> None: }, }, "async": {"concurrency": 10, "timeout": None}, - "threading": {"max_workers": None}, + "threading": {"max_workers": 1 if IS_WASM else None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", diff --git a/tests/test_indexing.py b/tests/test_indexing.py index b1707c88a3..b6217c458c 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -739,6 +739,7 @@ def _test_get_orthogonal_selection_3d( _test_get_orthogonal_selection(a, z, selection) +@pytest.mark.slow_wasm def test_get_orthogonal_selection_3d(store: StorePath) -> None: # setup a = np.arange(100000, dtype=int).reshape(200, 50, 10) @@ -801,6 +802,7 @@ def _test_set_orthogonal_selection( assert_array_equal(a, z[:]) +@pytest.mark.slow_wasm def test_set_orthogonal_selection_1d(store: StorePath) -> None: # setup v = np.arange(1050, dtype=int) @@ -846,6 +848,7 @@ def test_set_item_1d_last_two_chunks(store: StorePath): np.testing.assert_equal(z["zoo"][()], np.array(1)) +@pytest.mark.slow_wasm def _test_set_orthogonal_selection_2d( v: npt.NDArray[np.int_], a: npt.NDArray[np.int_], @@ -866,6 +869,7 @@ def _test_set_orthogonal_selection_2d( _test_set_orthogonal_selection(v, a, z, selection) +@pytest.mark.slow_wasm def test_set_orthogonal_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) @@ -928,6 +932,7 @@ def _test_set_orthogonal_selection_3d( _test_set_orthogonal_selection(v, a, z, selection) +@pytest.mark.slow_wasm def test_set_orthogonal_selection_3d(store: StorePath) -> None: # setup v = np.arange(100000, dtype=int).reshape(200, 50, 10) @@ -1148,6 +1153,7 @@ def test_set_coordinate_selection_1d(store: StorePath) -> None: z.vindex[selection] = 42 # type:ignore[index] +@pytest.mark.slow_wasm def test_set_coordinate_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) @@ -1331,6 +1337,7 @@ def _test_set_block_selection( assert_array_equal(a, z[:]) +@pytest.mark.slow_wasm def test_set_block_selection_1d(store: StorePath) -> None: # setup v = np.arange(1050, dtype=int) @@ -1349,6 +1356,7 @@ def test_set_block_selection_1d(store: StorePath) -> None: z.blocks[selection_bad] = 42 # type:ignore[index] +@pytest.mark.slow_wasm def test_set_block_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index 9bf1c4e544..f71a946300 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -618,6 +618,7 @@ async def test_use_consolidated_for_children_members( expected = ["b", "b/c"] assert result == expected + @pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf]) async def test_consolidated_metadata_encodes_special_chars( memory_store: Store, zarr_format: ZarrFormat, fill_value: float diff --git a/tests/test_properties.py b/tests/test_properties.py index d48dfe2fef..da26edf398 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -7,6 +7,7 @@ import pytest from numpy.testing import assert_array_equal +from zarr._constants import IS_WASM from zarr.core.buffer import default_buffer_prototype pytest.importorskip("hypothesis") @@ -76,6 +77,7 @@ def deep_equal(a: Any, b: Any) -> bool: return a == b +@pytest.mark.slow_wasm @given(data=st.data(), zarr_format=zarr_formats) def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None: nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format))) @@ -83,6 +85,7 @@ def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.slow_wasm @given(array=arrays()) def test_array_creates_implicit_groups(array): path = array.path @@ -102,6 +105,7 @@ def test_array_creates_implicit_groups(array): # this decorator removes timeout; not ideal but it should avoid intermittent CI failures +@pytest.mark.skipif(IS_WASM, reason="Unreliable test on Pyodide/WASM due to Hypothesis") @settings(deadline=None) @given(data=st.data()) def test_basic_indexing(data: st.DataObject) -> None: @@ -117,6 +121,7 @@ def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.skipif(IS_WASM, reason="Unreliable test on Pyodide/WASM due to Hypothesis") @given(data=st.data()) def test_oindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. @@ -138,6 +143,7 @@ def test_oindex(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.skipif(IS_WASM, reason="Unreliable test on Pyodide/WASM due to Hypothesis") @given(data=st.data()) def test_vindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. @@ -161,6 +167,7 @@ def test_vindex(data: st.DataObject) -> None: # assert_array_equal(nparray, zarray[:]) +@pytest.mark.slow_wasm @given(store=stores, meta=array_metadata()) # type: ignore[misc] async def test_roundtrip_array_metadata_from_store( store: Store, meta: ArrayV2Metadata | ArrayV3Metadata diff --git a/tests/test_sync.py b/tests/test_sync.py index 13b475f8da..84824839b5 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -5,6 +5,7 @@ import pytest import zarr +from zarr._constants import IS_WASM from zarr.core.sync import ( SyncError, SyncMixin, @@ -17,6 +18,11 @@ ) from zarr.storage import MemoryStore +pytestmark = pytest.mark.skipif( + IS_WASM, + reason="can't start new threads in Pyodide/WASM, so the synchronous API doesn't work", +) + @pytest.fixture(params=[True, False]) def sync_loop(request: pytest.FixtureRequest) -> asyncio.AbstractEventLoop | None: