diff --git a/docs/api/list.md b/docs/api/list.md index 64bafa8a..11097de9 100644 --- a/docs/api/list.md +++ b/docs/api/list.md @@ -3,6 +3,3 @@ ::: obstore.list ::: obstore.list_with_delimiter ::: obstore.list_with_delimiter_async -::: obstore.ListResult -::: obstore.ListStream -::: obstore.ListChunkType diff --git a/docs/api/store/aws.md b/docs/api/store/aws.md index 8287a1b1..b46b10d7 100644 --- a/docs/api/store/aws.md +++ b/docs/api/store/aws.md @@ -1,6 +1,9 @@ # AWS S3 ::: obstore.store.S3Store + options: + inherited_members: true + show_bases: false ::: obstore.store.S3Config options: show_if_no_docstring: true diff --git a/docs/api/store/azure.md b/docs/api/store/azure.md index 56ba55aa..9e8e2f0e 100644 --- a/docs/api/store/azure.md +++ b/docs/api/store/azure.md @@ -1,6 +1,9 @@ # Microsoft Azure ::: obstore.store.AzureStore + options: + inherited_members: true + show_bases: false ::: obstore.store.AzureConfig options: show_if_no_docstring: true diff --git a/docs/api/store/gcs.md b/docs/api/store/gcs.md index 7b7264ed..83803673 100644 --- a/docs/api/store/gcs.md +++ b/docs/api/store/gcs.md @@ -1,6 +1,9 @@ # Google Cloud Storage ::: obstore.store.GCSStore + options: + inherited_members: true + show_bases: false ::: obstore.store.GCSConfig options: show_if_no_docstring: true diff --git a/docs/api/store/http.md b/docs/api/store/http.md index 53e1001c..43206980 100644 --- a/docs/api/store/http.md +++ b/docs/api/store/http.md @@ -1,3 +1,6 @@ # HTTP ::: obstore.store.HTTPStore + options: + inherited_members: true + show_bases: false diff --git a/docs/api/store/local.md b/docs/api/store/local.md index ceb0cdab..63d34fcf 100644 --- a/docs/api/store/local.md +++ b/docs/api/store/local.md @@ -1,3 +1,6 @@ # Local ::: obstore.store.LocalStore + options: + inherited_members: true + show_bases: false diff --git a/docs/api/store/memory.md b/docs/api/store/memory.md index ced4fd86..b0a4701c 100644 --- a/docs/api/store/memory.md +++ b/docs/api/store/memory.md @@ -1,3 +1,6 @@ # Memory ::: obstore.store.MemoryStore + options: + inherited_members: true + show_bases: false diff --git a/docs/cookbook.md b/docs/cookbook.md index 17369ea4..48be6903 100644 --- a/docs/cookbook.md +++ b/docs/cookbook.md @@ -39,6 +39,19 @@ store = ... # store of your choice # Get a stream of Arrow RecordBatches of metadata list_stream = obs.list(store, prefix="data", return_arrow=True) for record_batch in list_stream: + # Perform zero-copy conversion to your arrow-backed library of choice + # + # To pyarrow: + # pyarrow.record_batch(record_batch) + # + # To polars: + # polars.DataFrame(record_batch) + # + # To pandas (with Arrow-backed data-types): + # pyarrow.record_batch(record_batch).to_pandas(types_mapper=pd.ArrowDtype) + # + # To arro3: + # arro3.core.RecordBatch(record_batch) print(record_batch.num_rows) ``` diff --git a/obstore/python/obstore/__init__.py b/obstore/python/obstore/__init__.py index be9a26a1..96e8321b 100644 --- a/obstore/python/obstore/__init__.py +++ b/obstore/python/obstore/__init__.py @@ -1,9 +1,50 @@ from typing import TYPE_CHECKING -from ._obstore import * -from ._obstore import ___version +from . import store +from ._obstore import ( + Bytes, + ___version, + copy, + copy_async, + delete, + delete_async, + get, + get_async, + get_range, + get_range_async, + get_ranges, + get_ranges_async, + head, + head_async, + list, # noqa: A004 + list_with_delimiter, + list_with_delimiter_async, + open_reader, + open_reader_async, + open_writer, + open_writer_async, + put, + put_async, + rename, + rename_async, + sign, + sign_async, +) if TYPE_CHECKING: - from . import exceptions, store - + from . import _store, exceptions + from ._obstore import ( + HTTP_METHOD, + AsyncReadableFile, + AsyncWritableFile, + Bytes, + BytesStream, + GetResult, + ListChunkType, + ListResult, + ListStream, + ReadableFile, + SignCapableStore, + WritableFile, + ) __version__: str = ___version() diff --git a/obstore/python/obstore/_buffered.pyi b/obstore/python/obstore/_buffered.pyi index 395f185a..1664e373 100644 --- a/obstore/python/obstore/_buffered.pyi +++ b/obstore/python/obstore/_buffered.pyi @@ -7,7 +7,7 @@ from obspec._attributes import Attributes from ._bytes import Bytes from ._list import ObjectMeta -from .store import ObjectStore +from ._store import ObjectStore if sys.version_info >= (3, 12): from collections.abc import Buffer diff --git a/obstore/python/obstore/_copy.pyi b/obstore/python/obstore/_copy.pyi index 78abe4ce..059efc90 100644 --- a/obstore/python/obstore/_copy.pyi +++ b/obstore/python/obstore/_copy.pyi @@ -1,4 +1,4 @@ -from .store import ObjectStore +from ._store import ObjectStore def copy(store: ObjectStore, from_: str, to: str, *, overwrite: bool = True) -> None: """Copy an object from one path to another in the same object store. diff --git a/obstore/python/obstore/_delete.pyi b/obstore/python/obstore/_delete.pyi index fca7e862..10d66c47 100644 --- a/obstore/python/obstore/_delete.pyi +++ b/obstore/python/obstore/_delete.pyi @@ -1,6 +1,6 @@ from collections.abc import Sequence -from .store import ObjectStore +from ._store import ObjectStore def delete(store: ObjectStore, paths: str | Sequence[str]) -> None: """Delete the object at the specified location(s). diff --git a/obstore/python/obstore/_get.pyi b/obstore/python/obstore/_get.pyi index 033e90ae..9ea75cd5 100644 --- a/obstore/python/obstore/_get.pyi +++ b/obstore/python/obstore/_get.pyi @@ -6,7 +6,7 @@ from obspec._get import GetOptions from ._bytes import Bytes from ._list import ObjectMeta -from .store import ObjectStore +from ._store import ObjectStore class GetResult: """Result for a get request. diff --git a/obstore/python/obstore/_list.pyi b/obstore/python/obstore/_list.pyi index cd0ce9b8..11290a6d 100644 --- a/obstore/python/obstore/_list.pyi +++ b/obstore/python/obstore/_list.pyi @@ -1,77 +1,12 @@ -# ruff: noqa: UP006 -# ruff: noqa: UP035 -# Use `list` instead of `List` for type annotation -# `typing.List` is deprecated, use `list` instead -# ruff: noqa: A001 -# Variable `list` is shadowing a Python builtinRuff +# ruff: noqa: A001, UP006, UP035 -from typing import Generic, List, Literal, Self, TypedDict, TypeVar, overload +from typing import List, Literal, overload from arro3.core import RecordBatch, Table +from obspec._list import ListResult, ListStream from obspec._meta import ObjectMeta -from .store import ObjectStore - -ListChunkType = TypeVar("ListChunkType", List[ObjectMeta], RecordBatch, Table) # noqa: PYI001 -"""The data structure used for holding list results. - -By default, listing APIs return a `list` of [`ObjectMeta`][obspec.ObjectMeta]. However -for improved performance when listing large buckets, you can pass `return_arrow=True`. -Then an Arrow `RecordBatch` will be returned instead. - -This implements [`obspec.ListChunkType_co`][], but is redefined here to specialize the -exact instance of the Arrow return type, given that in the obstore implementation, an -[`arro3.core.RecordBatch`][] or [`arro3.core.Table`][] will always be returned. -""" - -class ListResult(TypedDict, Generic[ListChunkType]): - """Result of a list call. - - Includes objects, prefixes (directories) and a token for the next set of results. - Individual result sets may be limited to 1,000 objects based on the underlying - object storage's limitations. - - This implements [`obspec.ListResult`][]. - """ - - common_prefixes: List[str] - """Prefixes that are common (like directories)""" - - objects: ListChunkType - """Object metadata for the listing""" - -class ListStream(Generic[ListChunkType]): - """A stream of [ObjectMeta][obspec.ObjectMeta] that can be polled in a sync or - async fashion. - - This implements [`obspec.ListStream`][]. - """ # noqa: D205 - - def __aiter__(self) -> Self: - """Return `Self` as an async iterator.""" - - def __iter__(self) -> Self: - """Return `Self` as an async iterator.""" - - async def collect_async(self) -> ListChunkType: - """Collect all remaining ObjectMeta objects in the stream. - - This ignores the `chunk_size` parameter from the `list` call and collects all - remaining data into a single chunk. - """ - - def collect(self) -> ListChunkType: - """Collect all remaining ObjectMeta objects in the stream. - - This ignores the `chunk_size` parameter from the `list` call and collects all - remaining data into a single chunk. - """ - - async def __anext__(self) -> ListChunkType: - """Return the next chunk of ObjectMeta in the stream.""" - - def __next__(self) -> ListChunkType: - """Return the next chunk of ObjectMeta in the stream.""" +from ._store import ObjectStore @overload def list( @@ -163,7 +98,7 @@ def list( !!! note There is no async version of this method, because `list` is not async under the hood, rather it only instantiates a stream, which can be polled in synchronous - or asynchronous fashion. See [`ListStream`][obstore.ListStream]. + or asynchronous fashion. See [`ListStream`][obspec.ListStream]. Args: store: The ObjectStore instance to use. @@ -174,8 +109,8 @@ def list( chunk_size: The number of items to collect per chunk in the returned (async) iterator. All chunks except for the last one will have this many items. This is ignored in the - [`collect`][obstore.ListStream.collect] and - [`collect_async`][obstore.ListStream.collect_async] methods of + [`collect`][obspec.ListStream.collect] and + [`collect_async`][obspec.ListStream.collect_async] methods of `ListStream`. return_arrow: If `True`, return each batch of list items as an Arrow `RecordBatch`, not as a list of Python `dict`s. Arrow removes serialization diff --git a/obstore/python/obstore/_obstore.pyi b/obstore/python/obstore/_obstore.pyi index 71f16f6f..cd90f807 100644 --- a/obstore/python/obstore/_obstore.pyi +++ b/obstore/python/obstore/_obstore.pyi @@ -1,3 +1,4 @@ +from . import _store as _store from ._buffered import AsyncReadableFile as AsyncReadableFile from ._buffered import AsyncWritableFile as AsyncWritableFile from ._buffered import ReadableFile as ReadableFile @@ -21,9 +22,6 @@ from ._get import get_ranges as get_ranges from ._get import get_ranges_async as get_ranges_async from ._head import head as head from ._head import head_async as head_async -from ._list import ListChunkType as ListChunkType -from ._list import ListResult as ListResult -from ._list import ListStream as ListStream from ._list import list as list # noqa: A004 from ._list import list_with_delimiter as list_with_delimiter from ._list import list_with_delimiter_async as list_with_delimiter_async @@ -31,6 +29,7 @@ from ._put import put as put from ._put import put_async as put_async from ._rename import rename as rename from ._rename import rename_async as rename_async +from ._scheme import parse_scheme as parse_scheme from ._sign import HTTP_METHOD as HTTP_METHOD from ._sign import SignCapableStore as SignCapableStore from ._sign import sign as sign diff --git a/obstore/python/obstore/_rename.pyi b/obstore/python/obstore/_rename.pyi index 27f815b5..38e358f1 100644 --- a/obstore/python/obstore/_rename.pyi +++ b/obstore/python/obstore/_rename.pyi @@ -1,4 +1,4 @@ -from .store import ObjectStore +from ._store import ObjectStore def rename(store: ObjectStore, from_: str, to: str, *, overwrite: bool = True) -> None: """Move an object from one path to another in the same object store. diff --git a/obstore/python/obstore/_scheme.pyi b/obstore/python/obstore/_scheme.pyi new file mode 100644 index 00000000..83f74c7a --- /dev/null +++ b/obstore/python/obstore/_scheme.pyi @@ -0,0 +1,5 @@ +from typing import Literal + +def parse_scheme( + url: str, +) -> Literal["s3", "gcs", "http", "local", "memory", "azure"]: ... diff --git a/obstore/python/obstore/store/__init__.pyi b/obstore/python/obstore/_store/__init__.pyi similarity index 98% rename from obstore/python/obstore/store/__init__.pyi rename to obstore/python/obstore/_store/__init__.pyi index 002485cf..93cb188a 100644 --- a/obstore/python/obstore/store/__init__.pyi +++ b/obstore/python/obstore/_store/__init__.pyi @@ -1,7 +1,7 @@ # TODO: move to reusable types package from collections.abc import Callable from pathlib import Path -from typing import Any, TypeAlias, Unpack, overload +from typing import Any, Self, TypeAlias, Unpack, overload from ._aws import S3Config as S3Config from ._aws import S3Credential as S3Credential @@ -154,7 +154,7 @@ class LocalStore: *, automatic_cleanup: bool = False, mkdir: bool = False, - ) -> LocalStore: + ) -> Self: """Construct a new LocalStore from a `file://` URL. **Examples:** diff --git a/obstore/python/obstore/store/_aws.pyi b/obstore/python/obstore/_store/_aws.pyi similarity index 99% rename from obstore/python/obstore/store/_aws.pyi rename to obstore/python/obstore/_store/_aws.pyi index 0e6cc85f..e2062fc4 100644 --- a/obstore/python/obstore/store/_aws.pyi +++ b/obstore/python/obstore/_store/_aws.pyi @@ -1,6 +1,15 @@ from collections.abc import Coroutine from datetime import datetime -from typing import Any, Literal, NotRequired, Protocol, TypeAlias, TypedDict, Unpack +from typing import ( + Any, + Literal, + NotRequired, + Protocol, + Self, + TypeAlias, + TypedDict, + Unpack, +) from ._client import ClientConfig from ._retry import RetryConfig @@ -501,7 +510,7 @@ class S3Store: retry_config: RetryConfig | None = None, credential_provider: S3CredentialProvider | None = None, **kwargs: Unpack[S3Config], - ) -> S3Store: + ) -> Self: """Parse available connection info from a well-known storage URL. The supported url schemes are: diff --git a/obstore/python/obstore/store/_azure.pyi b/obstore/python/obstore/_store/_azure.pyi similarity index 99% rename from obstore/python/obstore/store/_azure.pyi rename to obstore/python/obstore/_store/_azure.pyi index bf446677..11fe9bba 100644 --- a/obstore/python/obstore/store/_azure.pyi +++ b/obstore/python/obstore/_store/_azure.pyi @@ -1,6 +1,6 @@ from collections.abc import Coroutine from datetime import datetime -from typing import Any, Protocol, TypeAlias, TypedDict, Unpack +from typing import Any, Protocol, Self, TypeAlias, TypedDict, Unpack from ._client import ClientConfig from ._retry import RetryConfig @@ -353,7 +353,7 @@ class AzureStore: retry_config: RetryConfig | None = None, credential_provider: AzureCredentialProvider | None = None, **kwargs: Unpack[AzureConfig], - ) -> AzureStore: + ) -> Self: """Construct a new AzureStore with values populated from a well-known storage URL. The supported url schemes are: diff --git a/obstore/python/obstore/store/_client.pyi b/obstore/python/obstore/_store/_client.pyi similarity index 100% rename from obstore/python/obstore/store/_client.pyi rename to obstore/python/obstore/_store/_client.pyi diff --git a/obstore/python/obstore/store/_gcs.pyi b/obstore/python/obstore/_store/_gcs.pyi similarity index 98% rename from obstore/python/obstore/store/_gcs.pyi rename to obstore/python/obstore/_store/_gcs.pyi index 63712154..ead1f00a 100644 --- a/obstore/python/obstore/store/_gcs.pyi +++ b/obstore/python/obstore/_store/_gcs.pyi @@ -1,6 +1,6 @@ from collections.abc import Coroutine from datetime import datetime -from typing import Any, Protocol, TypedDict, Unpack +from typing import Any, Protocol, Self, TypedDict, Unpack from ._client import ClientConfig from ._retry import RetryConfig @@ -164,7 +164,7 @@ class GCSStore: retry_config: RetryConfig | None = None, credential_provider: GCSCredentialProvider | None = None, **kwargs: Unpack[GCSConfig], - ) -> GCSStore: + ) -> Self: """Construct a new GCSStore with values populated from a well-known storage URL. The supported url schemes are: diff --git a/obstore/python/obstore/store/_http.pyi b/obstore/python/obstore/_store/_http.pyi similarity index 74% rename from obstore/python/obstore/store/_http.pyi rename to obstore/python/obstore/_store/_http.pyi index 80cf8459..1b45946a 100644 --- a/obstore/python/obstore/store/_http.pyi +++ b/obstore/python/obstore/_store/_http.pyi @@ -1,25 +1,10 @@ +from typing import Self + from ._client import ClientConfig from ._retry import RetryConfig class HTTPStore: - """Configure a connection to a generic HTTP server. - - **Example** - - Accessing the number of stars for a repo: - - ```py - import json - - import obstore as obs - from obstore.store import HTTPStore - - store = HTTPStore.from_url("https://api.github.com") - resp = obs.get(store, "repos/developmentseed/obstore") - data = json.loads(resp.bytes()) - print(data["stargazers_count"]) - ``` - """ + """Configure a connection to a generic HTTP server.""" def __init__( self, @@ -49,7 +34,7 @@ class HTTPStore: *, client_options: ClientConfig | None = None, retry_config: RetryConfig | None = None, - ) -> HTTPStore: + ) -> Self: """Construct a new HTTPStore from a URL. This is an alias of [`HTTPStore.__init__`][obstore.store.HTTPStore.__init__]. diff --git a/obstore/python/obstore/store/_retry.pyi b/obstore/python/obstore/_store/_retry.pyi similarity index 100% rename from obstore/python/obstore/store/_retry.pyi rename to obstore/python/obstore/_store/_retry.pyi diff --git a/obstore/python/obstore/store.py b/obstore/python/obstore/store.py new file mode 100644 index 00000000..c96795c8 --- /dev/null +++ b/obstore/python/obstore/store.py @@ -0,0 +1,734 @@ +"""Interface for constructing cloud storage classes.""" + +# ruff: noqa: F401 + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal, TypeAlias, Unpack, overload + +import obstore as obs +from obstore._obstore import _store +from obstore._obstore import parse_scheme as _parse_scheme +from obstore.exceptions import BaseError + +if TYPE_CHECKING: + import sys + from collections.abc import ( + AsyncIterable, + AsyncIterator, + Callable, + Iterable, + Iterator, + Sequence, + ) + from pathlib import Path + from typing import IO, Literal + + from arro3.core import RecordBatch, Table + from obspec._attributes import Attributes + from obspec._get import GetOptions + from obspec._list import ListResult, ListStream + from obspec._meta import ObjectMeta + from obspec._put import PutMode, PutResult + + from obstore._obstore import Bytes, GetResult + from obstore._store import ( + AzureAccessKey, + AzureBearerToken, + AzureConfig, + AzureCredential, + AzureCredentialProvider, + AzureSASToken, + BackoffConfig, + ClientConfig, + GCSConfig, + GCSCredential, + GCSCredentialProvider, + RetryConfig, + S3Config, + S3Credential, + S3CredentialProvider, + ) + + if sys.version_info >= (3, 12): + from collections.abc import Buffer + else: + from typing_extensions import Buffer + + +__all__ = [ + "AzureStore", + "GCSStore", + "HTTPStore", + "LocalStore", + "MemoryStore", + "S3Store", + "from_url", +] + + +class _ObjectStoreMixin: + def copy(self, from_: str, to: str, *, overwrite: bool = True) -> None: + """Copy an object from one path to another in the same object store. + + Refer to the documentation for [copy][obstore.copy]. + """ + return obs.copy( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + from_, + to, + overwrite=overwrite, + ) + + async def copy_async( + self, + from_: str, + to: str, + *, + overwrite: bool = True, + ) -> None: + """Call `copy` asynchronously. + + Refer to the documentation for [copy][obstore.copy]. + """ + return await obs.copy_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + from_, + to, + overwrite=overwrite, + ) + + def delete(self, paths: str | Sequence[str]) -> None: + """Delete the object at the specified location(s). + + Refer to the documentation for [delete][obstore.delete]. + """ + return obs.delete( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + paths, + ) + + async def delete_async(self, paths: str | Sequence[str]) -> None: + """Call `delete` asynchronously. + + Refer to the documentation for [delete][obstore.delete]. + """ + return await obs.delete_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + paths, + ) + + def get( + self, + path: str, + *, + options: GetOptions | None = None, + ) -> GetResult: + """Return the bytes that are stored at the specified location. + + Refer to the documentation for [get][obstore.get]. + """ + return obs.get( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + options=options, + ) + + async def get_async( + self, + path: str, + *, + options: GetOptions | None = None, + ) -> GetResult: + """Call `get` asynchronously. + + Refer to the documentation for [get][obstore.get]. + """ + return await obs.get_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + options=options, + ) + + def get_range( + self, + path: str, + *, + start: int, + end: int | None = None, + length: int | None = None, + ) -> Bytes: + """Return the bytes stored at the specified location in the given byte range. + + Refer to the documentation for [get_range][obstore.get_range]. + """ + return obs.get_range( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + start=start, + end=end, + length=length, + ) + + async def get_range_async( + self, + path: str, + *, + start: int, + end: int | None = None, + length: int | None = None, + ) -> Bytes: + """Call `get_range` asynchronously. + + Refer to the documentation for [get_range][obstore.get_range]. + """ + return await obs.get_range_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + start=start, + end=end, + length=length, + ) + + def get_ranges( + self, + path: str, + *, + starts: Sequence[int], + ends: Sequence[int] | None = None, + lengths: Sequence[int] | None = None, + ) -> list[Bytes]: + """Return the bytes stored at the specified location in the given byte ranges. + + Refer to the documentation for [get_ranges][obstore.get_ranges]. + """ + return obs.get_ranges( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + starts=starts, + ends=ends, + lengths=lengths, + ) + + async def get_ranges_async( + self, + path: str, + *, + starts: Sequence[int], + ends: Sequence[int] | None = None, + lengths: Sequence[int] | None = None, + ) -> list[Bytes]: + """Call `get_ranges` asynchronously. + + Refer to the documentation for [get_ranges][obstore.get_ranges]. + """ + return await obs.get_ranges_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + starts=starts, + ends=ends, + lengths=lengths, + ) + + def head(self, path: str) -> ObjectMeta: + """Return the metadata for the specified location. + + Refer to the documentation for [head][obstore.head]. + """ + return obs.head( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + ) + + async def head_async(self, path: str) -> ObjectMeta: + """Call `head` asynchronously. + + Refer to the documentation for [head_async][obstore.head_async]. + """ + return await obs.head_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + ) + + @overload + def list( + self, + prefix: str | None = None, + *, + offset: str | None = None, + chunk_size: int = 50, + return_arrow: Literal[True], + ) -> ListStream[RecordBatch]: ... + @overload + def list( + self, + prefix: str | None = None, + *, + offset: str | None = None, + chunk_size: int = 50, + return_arrow: Literal[False] = False, + ) -> ListStream[list[ObjectMeta]]: ... + def list( + self, + prefix: str | None = None, + *, + offset: str | None = None, + chunk_size: int = 50, + return_arrow: bool = False, + ) -> ListStream[RecordBatch] | ListStream[list[ObjectMeta]]: + """List all the objects with the given prefix. + + Refer to the documentation for [list][obstore.list]. + """ + # Splitting these fixes the typing issue with the `return_arrow` parameter, by + # converting from a bool to a Literal[True] or Literal[False] + if return_arrow: + return obs.list( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + prefix, + offset=offset, + chunk_size=chunk_size, + return_arrow=return_arrow, + ) + + return obs.list( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + prefix, + offset=offset, + chunk_size=chunk_size, + return_arrow=return_arrow, + ) + + @overload + def list_with_delimiter( + self, + prefix: str | None = None, + *, + return_arrow: Literal[True], + ) -> ListResult[Table]: ... + @overload + def list_with_delimiter( + self, + prefix: str | None = None, + *, + return_arrow: Literal[False] = False, + ) -> ListResult[list[ObjectMeta]]: ... + def list_with_delimiter( + self, + prefix: str | None = None, + *, + return_arrow: bool = False, + ) -> ListResult[Table] | ListResult[list[ObjectMeta]]: + """List objects with the given prefix and an implementation specific + delimiter. + + Refer to the documentation for + [list_with_delimiter][obstore.list_with_delimiter]. + """ # noqa: D205 + # Splitting these fixes the typing issue with the `return_arrow` parameter, by + # converting from a bool to a Literal[True] or Literal[False] + if return_arrow: + return obs.list_with_delimiter( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + prefix, + return_arrow=return_arrow, + ) + + return obs.list_with_delimiter( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + prefix, + return_arrow=return_arrow, + ) + + @overload + async def list_with_delimiter_async( + self, + prefix: str | None = None, + *, + return_arrow: Literal[True], + ) -> ListResult[Table]: ... + @overload + async def list_with_delimiter_async( + self, + prefix: str | None = None, + *, + return_arrow: Literal[False] = False, + ) -> ListResult[list[ObjectMeta]]: ... + async def list_with_delimiter_async( + self, + prefix: str | None = None, + *, + return_arrow: bool = False, + ) -> ListResult[Table] | ListResult[list[ObjectMeta]]: + """Call `list_with_delimiter` asynchronously. + + Refer to the documentation for + [list_with_delimiter][obstore.list_with_delimiter]. + """ + # Splitting these fixes the typing issue with the `return_arrow` parameter, by + # converting from a bool to a Literal[True] or Literal[False] + if return_arrow: + return await obs.list_with_delimiter_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + prefix, + return_arrow=return_arrow, + ) + + return await obs.list_with_delimiter_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + prefix, + return_arrow=return_arrow, + ) + + def put( # noqa: PLR0913 + self, + path: str, + file: IO[bytes] | Path | bytes | Buffer | Iterator[Buffer] | Iterable[Buffer], + *, + attributes: Attributes | None = None, + tags: dict[str, str] | None = None, + mode: PutMode | None = None, + use_multipart: bool | None = None, + chunk_size: int = 5 * 1024 * 1024, + max_concurrency: int = 12, + ) -> PutResult: + """Save the provided bytes to the specified location. + + Refer to the documentation for [put][obstore.put]. + """ + return obs.put( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + file, + attributes=attributes, + tags=tags, + mode=mode, + use_multipart=use_multipart, + chunk_size=chunk_size, + max_concurrency=max_concurrency, + ) + + async def put_async( # noqa: PLR0913 + self, + path: str, + file: IO[bytes] + | Path + | bytes + | Buffer + | AsyncIterator[Buffer] + | AsyncIterable[Buffer] + | Iterator[Buffer] + | Iterable[Buffer], + *, + attributes: Attributes | None = None, + tags: dict[str, str] | None = None, + mode: PutMode | None = None, + use_multipart: bool | None = None, + chunk_size: int = 5 * 1024 * 1024, + max_concurrency: int = 12, + ) -> PutResult: + """Call `put` asynchronously. + + Refer to the documentation for [`put`][obstore.put]. In addition to what the + synchronous `put` allows for the `file` parameter, this **also supports an async + iterator or iterable** of objects implementing the Python buffer protocol. + + This means, for example, you can pass the result of `get_async` directly to + `put_async`, and the request will be streamed through Python during the put + operation: + + ```py + import obstore as obs + + # This only constructs the stream, it doesn't materialize the data in memory + resp = await obs.get_async(store1, path1) + # A streaming upload is created to copy the file to path2 + await obs.put_async(store2, path2) + ``` + """ + return await obs.put_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + path, + file, + attributes=attributes, + tags=tags, + mode=mode, + use_multipart=use_multipart, + chunk_size=chunk_size, + max_concurrency=max_concurrency, + ) + + def rename(self, from_: str, to: str, *, overwrite: bool = True) -> None: + """Move an object from one path to another in the same object store. + + Refer to the documentation for [rename][obstore.rename]. + """ + return obs.rename( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + from_, + to, + overwrite=overwrite, + ) + + async def rename_async( + self, + from_: str, + to: str, + *, + overwrite: bool = True, + ) -> None: + """Call `rename` asynchronously. + + Refer to the documentation for [rename][obstore.rename]. + """ + return await obs.rename_async( + self, # type: ignore (Argument of type "Self@_ObjectStoreMixin" cannot be assigned to parameter "store") + from_, + to, + overwrite=overwrite, + ) + + +class AzureStore(_ObjectStoreMixin, _store.AzureStore): + """Interface to a Microsoft Azure Blob Storage container. + + All constructors will check for environment variables. Refer to + [`AzureConfig`][obstore.store.AzureConfig] for valid environment variables. + """ + + +class GCSStore(_ObjectStoreMixin, _store.GCSStore): + """Interface to Google Cloud Storage. + + All constructors will check for environment variables. Refer to + [`GCSConfig`][obstore.store.GCSConfig] for valid environment variables. + + If no credentials are explicitly provided, they will be sourced from the environment + as documented + [here](https://cloud.google.com/docs/authentication/application-default-credentials). + """ + + +class HTTPStore(_ObjectStoreMixin, _store.HTTPStore): + """Configure a connection to a generic HTTP server. + + **Example** + + Accessing the number of stars for a repo: + + ```py + import json + + import obstore as obs + from obstore.store import HTTPStore + + store = HTTPStore.from_url("https://api.github.com") + resp = obs.get(store, "repos/developmentseed/obstore") + data = json.loads(resp.bytes()) + print(data["stargazers_count"]) + ``` + """ + + +class LocalStore(_ObjectStoreMixin, _store.LocalStore): + """An ObjectStore interface to local filesystem storage. + + Can optionally be created with a directory prefix. + + ```py + from pathlib import Path + + store = LocalStore() + store = LocalStore(prefix="/path/to/directory") + store = LocalStore(prefix=Path(".")) + ``` + """ + + +class MemoryStore(_ObjectStoreMixin, _store.MemoryStore): + """A fully in-memory implementation of ObjectStore. + + Create a new in-memory store: + ```py + store = MemoryStore() + ``` + """ + + +class S3Store(_ObjectStoreMixin, _store.S3Store): + """Interface to an Amazon S3 bucket. + + All constructors will check for environment variables. Refer to + [`S3Config`][obstore.store.S3Config] for valid environment variables. + + **Examples**: + + **Using requester-pays buckets**: + + Pass `request_payer=True` as a keyword argument or have `AWS_REQUESTER_PAYS=True` + set in the environment. + + **Anonymous requests**: + + Pass `skip_signature=True` as a keyword argument or have `AWS_SKIP_SIGNATURE=True` + set in the environment. + """ + + +ObjectStore: TypeAlias = ( + AzureStore | GCSStore | HTTPStore | S3Store | LocalStore | MemoryStore +) +"""All supported ObjectStore implementations.""" + + +# Note: we define `from_url` again so that we can instantiate the **subclasses**. +@overload +def from_url( + url: str, + *, + config: S3Config | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: S3CredentialProvider | None = None, + **kwargs: Unpack[S3Config], +) -> ObjectStore: ... +@overload +def from_url( + url: str, + *, + config: GCSConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: GCSCredentialProvider | None = None, + **kwargs: Unpack[GCSConfig], +) -> ObjectStore: ... +@overload +def from_url( + url: str, + *, + config: AzureConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: AzureCredentialProvider | None = None, + **kwargs: Unpack[AzureConfig], +) -> ObjectStore: ... +@overload +def from_url( # type: ignore (parameter overlap) + url: str, + *, + config: None = None, + client_options: None = None, + retry_config: None = None, + automatic_cleanup: bool = False, + mkdir: bool = False, +) -> ObjectStore: ... +def from_url( # noqa: C901 + url: str, + *, + config: S3Config | GCSConfig | AzureConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: Callable | None = None, + **kwargs: Any, +) -> ObjectStore: + """Easy construction of store by URL, identifying the relevant store. + + This will defer to a store-specific `from_url` constructor based on the provided + `url`. E.g. passing `"s3://bucket/path"` will defer to + [`S3Store.from_url`][obstore.store.S3Store.from_url]. + + Supported formats: + + - `file:///path/to/my/file` -> [`LocalStore`][obstore.store.LocalStore] + - `memory:///` -> [`MemoryStore`][obstore.store.MemoryStore] + - `s3://bucket/path` -> [`S3Store`][obstore.store.S3Store] (also supports `s3a`) + - `gs://bucket/path` -> [`GCSStore`][obstore.store.GCSStore] + - `az://account/container/path` -> [`AzureStore`][obstore.store.AzureStore] (also + supports `adl`, `azure`, `abfs`, `abfss`) + - `http://mydomain/path` -> [`HTTPStore`][obstore.store.HTTPStore] + - `https://mydomain/path` -> [`HTTPStore`][obstore.store.HTTPStore] + + There are also special cases for AWS and Azure for `https://{host?}/path` paths: + + - `dfs.core.windows.net`, `blob.core.windows.net`, `dfs.fabric.microsoft.com`, + `blob.fabric.microsoft.com` -> [`AzureStore`][obstore.store.AzureStore] + - `amazonaws.com` -> [`S3Store`][obstore.store.S3Store] + - `r2.cloudflarestorage.com` -> [`S3Store`][obstore.store.S3Store] + + !!! note + For best static typing, use the constructors on individual store classes + directly. + + Args: + url: well-known storage URL. + + Keyword Args: + config: per-store Configuration. Values in this config will override values + inferred from the url. Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom credentials to the underlying + store classes. + kwargs: per-store configuration passed down to store-specific builders. + + """ + scheme = _parse_scheme(url) + if scheme == "s3": + return S3Store.from_url( + url, + config=config, # type: ignore (config narrowing) + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, + ) + if scheme == "gcs": + return GCSStore.from_url( + url, + config=config, # type: ignore (config narrowing) + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, + ) + if scheme == "azure": + return AzureStore.from_url( + url, + config=config, # type: ignore (config narrowing) + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, + ) + if scheme == "http": + if config or kwargs: + msg = "HTTPStore does not accept any configuration" + raise BaseError(msg) + + return HTTPStore.from_url( + url, + client_options=client_options, + retry_config=retry_config, + ) + if scheme == "local": + automatic_cleanup = False + mkdir = False + if "automatic_cleanup" in kwargs: + automatic_cleanup = kwargs.pop("automatic_cleanup") + if "mkdir" in kwargs: + mkdir = kwargs.pop("mkdir") + + return LocalStore.from_url( + url, + automatic_cleanup=automatic_cleanup, + mkdir=mkdir, + ) + if scheme == "memory": + if config or kwargs: + msg = "MemoryStore does not accept any configuration" + raise BaseError(msg) + + return MemoryStore() + + msg = f"Unknown scheme: {url}" + raise BaseError(msg) diff --git a/obstore/src/lib.rs b/obstore/src/lib.rs index 9662bcda..e4606b5e 100644 --- a/obstore/src/lib.rs +++ b/obstore/src/lib.rs @@ -11,6 +11,7 @@ mod list; mod path; mod put; mod rename; +mod scheme; mod signer; mod tags; mod utils; @@ -51,8 +52,8 @@ fn _obstore(py: Python, m: &Bound) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(___version))?; - pyo3_object_store::register_store_module(py, m, "obstore")?; - pyo3_object_store::register_exceptions_module(py, m, "obstore")?; + pyo3_object_store::register_store_module(py, m, "obstore", "_store")?; + pyo3_object_store::register_exceptions_module(py, m, "obstore", "exceptions")?; m.add_class::()?; // Set the value of `__module__` correctly on PyBytes @@ -81,6 +82,7 @@ fn _obstore(py: Python, m: &Bound) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(put::put))?; m.add_wrapped(wrap_pyfunction!(rename::rename_async))?; m.add_wrapped(wrap_pyfunction!(rename::rename))?; + m.add_wrapped(wrap_pyfunction!(scheme::parse_scheme))?; m.add_wrapped(wrap_pyfunction!(signer::sign_async))?; m.add_wrapped(wrap_pyfunction!(signer::sign))?; diff --git a/obstore/src/scheme.rs b/obstore/src/scheme.rs new file mode 100644 index 00000000..4e0b5a99 --- /dev/null +++ b/obstore/src/scheme.rs @@ -0,0 +1,19 @@ +use object_store::ObjectStoreScheme; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3_object_store::{PyObjectStoreResult, PyUrl}; + +#[pyfunction] +pub(crate) fn parse_scheme(url: PyUrl) -> PyObjectStoreResult<&'static str> { + let (scheme, _) = + object_store::ObjectStoreScheme::parse(url.as_ref()).map_err(object_store::Error::from)?; + match scheme { + ObjectStoreScheme::AmazonS3 => Ok("s3"), + ObjectStoreScheme::GoogleCloudStorage => Ok("gcs"), + ObjectStoreScheme::Http => Ok("http"), + ObjectStoreScheme::Local => Ok("local"), + ObjectStoreScheme::Memory => Ok("memory"), + ObjectStoreScheme::MicrosoftAzure => Ok("azure"), + _ => Err(PyValueError::new_err("Unknown scheme: {scheme:?}").into()), + } +} diff --git a/pyo3-object_store/src/api.rs b/pyo3-object_store/src/api.rs index 48330335..4a735035 100644 --- a/pyo3-object_store/src/api.rs +++ b/pyo3-object_store/src/api.rs @@ -14,7 +14,8 @@ use crate::{ /// /// - [`Python`][pyo3::prelude::Python] token /// - parent_module: [`PyModule`][pyo3::prelude::PyModule] object -/// - parent_module_str: the string name of the Python module for how this is exported. +/// - parent_module_str: the string name of the parent Python module for how this is exported. +/// - sub_module_str: the string name of **this** Python module. Usually `store` but `_store` may be preferred in some cases. /// /// ```notest /// #[pymodule] @@ -41,10 +42,11 @@ pub fn register_store_module( py: Python<'_>, parent_module: &Bound<'_, PyModule>, parent_module_str: &str, + sub_module_str: &str, ) -> PyResult<()> { - let full_module_string = format!("{}.store", parent_module_str); + let full_module_string = format!("{}.{}", parent_module_str, sub_module_str); - let child_module = PyModule::new(parent_module.py(), "store")?; + let child_module = PyModule::new(parent_module.py(), sub_module_str)?; child_module.add_wrapped(wrap_pyfunction!(from_url))?; child_module.add_class::()?; @@ -98,10 +100,11 @@ pub fn register_exceptions_module( py: Python<'_>, parent_module: &Bound<'_, PyModule>, parent_module_str: &str, + sub_module_str: &str, ) -> PyResult<()> { - let full_module_string = format!("{}.exceptions", parent_module_str); + let full_module_string = format!("{}.{}", parent_module_str, sub_module_str); - let child_module = PyModule::new(parent_module.py(), "exceptions")?; + let child_module = PyModule::new(parent_module.py(), sub_module_str)?; child_module.add("BaseError", py.get_type::())?; child_module.add("GenericError", py.get_type::())?; diff --git a/pyo3-object_store/src/aws/store.rs b/pyo3-object_store/src/aws/store.rs index 28316636..1308903c 100644 --- a/pyo3-object_store/src/aws/store.rs +++ b/pyo3-object_store/src/aws/store.rs @@ -60,7 +60,7 @@ impl S3Config { } /// A Python-facing wrapper around an [`AmazonS3`]. -#[pyclass(name = "S3Store", frozen)] +#[pyclass(name = "S3Store", frozen, subclass)] pub struct PyS3Store { store: Arc>, /// A config used for pickling. This must stay in sync with the underlying store's config. @@ -131,33 +131,34 @@ impl PyS3Store { #[classmethod] #[pyo3(signature = (url, *, config=None, client_options=None, retry_config=None, credential_provider=None, **kwargs))] pub(crate) fn from_url( - _cls: &Bound, + cls: &Bound, url: PyUrl, config: Option, client_options: Option, retry_config: Option, credential_provider: Option, kwargs: Option, - ) -> PyObjectStoreResult { + ) -> PyObjectStoreResult { // We manually parse the URL to find the prefix because `with_url` does not apply the // prefix. let (_, prefix) = ObjectStoreScheme::parse(url.as_ref()).map_err(object_store::Error::from)?; - let prefix = if prefix.parts().count() != 0 { + let prefix: Option = if prefix.parts().count() != 0 { Some(prefix.into()) } else { None }; let config = parse_url(config, url.as_ref())?; - Self::new( - None, - prefix, - Some(config), - client_options, - retry_config, - credential_provider, - kwargs, - ) + + // Note: we pass **back** through Python so that if cls is a subclass, we instantiate the + // subclass + let kwargs = kwargs.unwrap_or_default().into_pyobject(cls.py())?; + kwargs.set_item("prefix", prefix)?; + kwargs.set_item("config", config)?; + kwargs.set_item("client_options", client_options)?; + kwargs.set_item("retry_config", retry_config)?; + kwargs.set_item("credential_provider", credential_provider)?; + Ok(cls.call((), Some(&kwargs))?.unbind()) } fn __getnewargs_ex__(&self, py: Python) -> PyResult { diff --git a/pyo3-object_store/src/azure/store.rs b/pyo3-object_store/src/azure/store.rs index 6d7fee9e..109f675b 100644 --- a/pyo3-object_store/src/azure/store.rs +++ b/pyo3-object_store/src/azure/store.rs @@ -57,7 +57,7 @@ impl AzureConfig { } /// A Python-facing wrapper around a [`MicrosoftAzure`]. -#[pyclass(name = "AzureStore", frozen)] +#[pyclass(name = "AzureStore", frozen, subclass)] pub struct PyAzureStore { store: Arc>, /// A config used for pickling. This must stay in sync with the underlying store's config. @@ -124,33 +124,34 @@ impl PyAzureStore { #[classmethod] #[pyo3(signature = (url, *, config=None, client_options=None, retry_config=None, credential_provider=None, **kwargs))] pub(crate) fn from_url( - _cls: &Bound, + cls: &Bound, url: PyUrl, config: Option, client_options: Option, retry_config: Option, credential_provider: Option, kwargs: Option, - ) -> PyObjectStoreResult { + ) -> PyObjectStoreResult { // We manually parse the URL to find the prefix because `parse_url` does not apply the // prefix. let (_, prefix) = ObjectStoreScheme::parse(url.as_ref()).map_err(object_store::Error::from)?; - let prefix = if prefix.parts().count() != 0 { + let prefix: Option = if prefix.parts().count() != 0 { Some(prefix.into()) } else { None }; let config = parse_url(config, url.as_ref())?; - Self::new( - None, - prefix, - Some(config), - client_options, - retry_config, - credential_provider, - kwargs, - ) + + // Note: we pass **back** through Python so that if cls is a subclass, we instantiate the + // subclass + let kwargs = kwargs.unwrap_or_default().into_pyobject(cls.py())?; + kwargs.set_item("prefix", prefix)?; + kwargs.set_item("config", config)?; + kwargs.set_item("client_options", client_options)?; + kwargs.set_item("retry_config", retry_config)?; + kwargs.set_item("credential_provider", credential_provider)?; + Ok(cls.call((), Some(&kwargs))?.unbind()) } fn __getnewargs_ex__(&self, py: Python) -> PyResult { diff --git a/pyo3-object_store/src/gcp/store.rs b/pyo3-object_store/src/gcp/store.rs index e5b8bf42..7dcebf5f 100644 --- a/pyo3-object_store/src/gcp/store.rs +++ b/pyo3-object_store/src/gcp/store.rs @@ -57,7 +57,7 @@ impl GCSConfig { } /// A Python-facing wrapper around a [`GoogleCloudStorage`]. -#[pyclass(name = "GCSStore", frozen)] +#[pyclass(name = "GCSStore", frozen, subclass)] pub struct PyGCSStore { store: Arc>, /// A config used for pickling. This must stay in sync with the underlying store's config. @@ -124,33 +124,34 @@ impl PyGCSStore { #[classmethod] #[pyo3(signature = (url, *, config=None, client_options=None, retry_config=None, credential_provider=None, **kwargs))] pub(crate) fn from_url( - _cls: &Bound, + cls: &Bound, url: PyUrl, config: Option, client_options: Option, retry_config: Option, credential_provider: Option, kwargs: Option, - ) -> PyObjectStoreResult { + ) -> PyObjectStoreResult { // We manually parse the URL to find the prefix because `parse_url` does not apply the // prefix. let (_, prefix) = ObjectStoreScheme::parse(url.as_ref()).map_err(object_store::Error::from)?; - let prefix = if prefix.parts().count() != 0 { + let prefix: Option = if prefix.parts().count() != 0 { Some(prefix.into()) } else { None }; let config = parse_url(config, url.as_ref())?; - Self::new( - None, - prefix, - Some(config), - client_options, - retry_config, - credential_provider, - kwargs, - ) + + // Note: we pass **back** through Python so that if cls is a subclass, we instantiate the + // subclass + let kwargs = kwargs.unwrap_or_default().into_pyobject(cls.py())?; + kwargs.set_item("prefix", prefix)?; + kwargs.set_item("config", config)?; + kwargs.set_item("client_options", client_options)?; + kwargs.set_item("retry_config", retry_config)?; + kwargs.set_item("credential_provider", credential_provider)?; + Ok(cls.call((), Some(&kwargs))?.unbind()) } fn __getnewargs_ex__(&self, py: Python) -> PyResult { diff --git a/pyo3-object_store/src/http.rs b/pyo3-object_store/src/http.rs index d0fd71ff..aecde704 100644 --- a/pyo3-object_store/src/http.rs +++ b/pyo3-object_store/src/http.rs @@ -32,7 +32,7 @@ impl HTTPConfig { } /// A Python-facing wrapper around a [`HttpStore`]. -#[pyclass(name = "HTTPStore", frozen)] +#[pyclass(name = "HTTPStore", frozen, subclass)] pub struct PyHttpStore { // Note: we don't need to wrap this in a MaybePrefixedStore because the HttpStore manages its // own prefix. @@ -83,12 +83,19 @@ impl PyHttpStore { #[classmethod] #[pyo3(signature = (url, *, client_options=None, retry_config=None))] pub(crate) fn from_url( - _cls: &Bound, + cls: &Bound, + py: Python, url: PyUrl, client_options: Option, retry_config: Option, - ) -> PyObjectStoreResult { - Self::new(url, client_options, retry_config) + ) -> PyObjectStoreResult { + // Note: we pass **back** through Python so that if cls is a subclass, we instantiate the + // subclass + let kwargs = PyDict::new(py); + kwargs.set_item("url", url)?; + kwargs.set_item("client_options", client_options)?; + kwargs.set_item("retry_config", retry_config)?; + Ok(cls.call((), Some(&kwargs))?.unbind()) } fn __getnewargs_ex__(&self, py: Python) -> PyResult { diff --git a/pyo3-object_store/src/local.rs b/pyo3-object_store/src/local.rs index 6fca617f..36a43e51 100644 --- a/pyo3-object_store/src/local.rs +++ b/pyo3-object_store/src/local.rs @@ -29,7 +29,7 @@ impl LocalConfig { } /// A Python-facing wrapper around a [`LocalFileSystem`]. -#[pyclass(name = "LocalStore", frozen)] +#[pyclass(name = "LocalStore", frozen, subclass)] pub struct PyLocalStore { store: Arc, config: LocalConfig, @@ -52,7 +52,7 @@ impl PyLocalStore { impl PyLocalStore { #[new] #[pyo3(signature = (prefix=None, *, automatic_cleanup=false, mkdir=false))] - fn py_new( + fn new( prefix: Option, automatic_cleanup: bool, mkdir: bool, @@ -79,11 +79,11 @@ impl PyLocalStore { #[classmethod] #[pyo3(signature = (url, *, automatic_cleanup=false, mkdir=false))] pub(crate) fn from_url( - _cls: &Bound, + cls: &Bound, url: PyUrl, automatic_cleanup: bool, mkdir: bool, - ) -> PyObjectStoreResult { + ) -> PyObjectStoreResult { let url = url.into_inner(); let (scheme, path) = ObjectStoreScheme::parse(&url).map_err(object_store::Error::from)?; @@ -96,7 +96,14 @@ impl PyLocalStore { // Hopefully this also works on Windows. let root = std::path::Path::new("/"); let full_path = root.join(path.as_ref()); - Self::py_new(Some(full_path), automatic_cleanup, mkdir) + + // Note: we pass **back** through Python so that if cls is a subclass, we instantiate the + // subclass + let kwargs = PyDict::new(cls.py()); + kwargs.set_item("prefix", full_path)?; + kwargs.set_item("automatic_cleanup", automatic_cleanup)?; + kwargs.set_item("mkdir", mkdir)?; + Ok(cls.call((), Some(&kwargs))?.unbind()) } fn __getnewargs_ex__(&self, py: Python) -> PyResult { diff --git a/pyo3-object_store/src/memory.rs b/pyo3-object_store/src/memory.rs index 996220e7..e826b17f 100644 --- a/pyo3-object_store/src/memory.rs +++ b/pyo3-object_store/src/memory.rs @@ -6,7 +6,7 @@ use pyo3::prelude::*; use pyo3::types::PyString; /// A Python-facing wrapper around an [`InMemory`]. -#[pyclass(name = "MemoryStore", frozen)] +#[pyclass(name = "MemoryStore", frozen, subclass)] pub struct PyMemoryStore(Arc); impl AsRef> for PyMemoryStore { diff --git a/pyo3-object_store/src/simple.rs b/pyo3-object_store/src/simple.rs index dbefa6b9..9f9a99c9 100644 --- a/pyo3-object_store/src/simple.rs +++ b/pyo3-object_store/src/simple.rs @@ -71,6 +71,7 @@ pub fn from_url( raise_if_config_passed(config, kwargs, "http")?; let store = PyHttpStore::from_url( &PyType::new::(py), + py, url, client_options, retry_config, diff --git a/pyproject.toml b/pyproject.toml index f496aa0a..0adeec6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dev-dependencies = [ "mkdocstrings-python>=1.13.0", "mkdocstrings>=0.27.0", "moto[s3,server]>=5.0.18", - "obspec>=0.1.0-beta.1", + "obspec>=0.1.0b1", "pandas>=2.2.3", "pip>=24.2", "pyarrow>=17.0.0", @@ -29,6 +29,7 @@ dev-dependencies = [ "pytest>=8.3.3", "ruff>=0.8.4", "types-boto3[sts]>=1.36.23", + "polars>=1.24.0", ] [tool.uv.workspace] diff --git a/tests/store/test_local.py b/tests/store/test_local.py index db18c473..ae434cf1 100644 --- a/tests/store/test_local.py +++ b/tests/store/test_local.py @@ -1,5 +1,4 @@ import pickle -import tempfile from pathlib import Path import pytest @@ -46,8 +45,8 @@ def test_local_from_url(): store = LocalStore.from_url(url) -def test_create_prefix(): - tmpdir = Path(tempfile.gettempdir()) / "abc" +def test_create_prefix(tmp_path: Path): + tmpdir = tmp_path / "abc" assert not tmpdir.exists() LocalStore(tmpdir, mkdir=True) assert tmpdir.exists() @@ -57,18 +56,16 @@ def test_create_prefix(): assert tmpdir.exists() -def test_prefix_property(): - tmpdir = Path(tempfile.gettempdir()) - store = LocalStore(tmpdir) - assert store.prefix == tmpdir +def test_prefix_property(tmp_path: Path): + store = LocalStore(tmp_path) + assert store.prefix == tmp_path assert isinstance(store.prefix, Path) # Can pass it back to the store init LocalStore(store.prefix) -def test_pickle(): - tmpdir = Path(tempfile.gettempdir()) - store = LocalStore(tmpdir) +def test_pickle(tmp_path: Path): + store = LocalStore(tmp_path) obs.put(store, "path.txt", b"foo") new_store: LocalStore = pickle.loads(pickle.dumps(store)) assert obs.get(new_store, "path.txt").bytes() == b"foo" diff --git a/tests/test_delete.py b/tests/test_delete.py index b5cc2738..b51adf55 100644 --- a/tests/test_delete.py +++ b/tests/test_delete.py @@ -9,15 +9,15 @@ def test_delete_one(): store = MemoryStore() - obs.put(store, "file1.txt", b"foo") - obs.put(store, "file2.txt", b"bar") - obs.put(store, "file3.txt", b"baz") + store.put("file1.txt", b"foo") + store.put("file2.txt", b"bar") + store.put("file3.txt", b"baz") - assert len(obs.list(store).collect()) == 3 - obs.delete(store, "file1.txt") - obs.delete(store, "file2.txt") - obs.delete(store, "file3.txt") - assert len(obs.list(store).collect()) == 0 + assert len(store.list().collect()) == 3 + store.delete("file1.txt") + store.delete("file2.txt") + store.delete("file3.txt") + assert len(store.list().collect()) == 0 @pytest.mark.asyncio @@ -32,16 +32,16 @@ async def test_delete_async(): def test_delete_many(): store = MemoryStore() - obs.put(store, "file1.txt", b"foo") - obs.put(store, "file2.txt", b"bar") - obs.put(store, "file3.txt", b"baz") + store.put("file1.txt", b"foo") + store.put("file2.txt", b"bar") + store.put("file3.txt", b"baz") - assert len(obs.list(store).collect()) == 3 + assert len(store.list().collect()) == 3 obs.delete( store, ["file1.txt", "file2.txt", "file3.txt"], ) - assert len(obs.list(store).collect()) == 0 + assert len(store.list().collect()) == 0 # Local filesystem errors if the file does not exist. @@ -49,15 +49,15 @@ def test_delete_one_local_fs(): with TemporaryDirectory() as tmpdir: store = LocalStore(tmpdir) - obs.put(store, "file1.txt", b"foo") - obs.put(store, "file2.txt", b"bar") - obs.put(store, "file3.txt", b"baz") + store.put("file1.txt", b"foo") + store.put("file2.txt", b"bar") + store.put("file3.txt", b"baz") - assert len(obs.list(store).collect()) == 3 + assert len(store.list().collect()) == 3 obs.delete(store, "file1.txt") obs.delete(store, "file2.txt") obs.delete(store, "file3.txt") - assert len(obs.list(store).collect()) == 0 + assert len(store.list().collect()) == 0 with pytest.raises(FileNotFoundError): obs.delete(store, "file1.txt") @@ -67,11 +67,11 @@ def test_delete_many_local_fs(): with TemporaryDirectory() as tmpdir: store = LocalStore(tmpdir) - obs.put(store, "file1.txt", b"foo") - obs.put(store, "file2.txt", b"bar") - obs.put(store, "file3.txt", b"baz") + store.put("file1.txt", b"foo") + store.put("file2.txt", b"bar") + store.put("file3.txt", b"baz") - assert len(obs.list(store).collect()) == 3 + assert len(store.list().collect()) == 3 obs.delete( store, ["file1.txt", "file2.txt", "file3.txt"], diff --git a/tests/test_get.py b/tests/test_get.py index 351670e0..50ce4de7 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -1,6 +1,5 @@ import pytest -import obstore as obs from obstore.store import MemoryStore @@ -10,8 +9,8 @@ def test_stream_sync(): data = b"the quick brown fox jumps over the lazy dog," * 5000 path = "big-data.txt" - obs.put(store, path, data) - resp = obs.get(store, path) + store.put(path, data) + resp = store.get(path) stream = resp.stream(min_chunk_size=0) # Note: it looks from manual testing that with the local store we're only getting @@ -32,8 +31,8 @@ async def test_stream_async(): data = b"the quick brown fox jumps over the lazy dog," * 5000 path = "big-data.txt" - await obs.put_async(store, path, data) - resp = await obs.get_async(store, path) + await store.put_async(path, data) + resp = await store.get_async(path) stream = resp.stream(min_chunk_size=0) # Note: it looks from manual testing that with the local store we're only getting @@ -53,15 +52,15 @@ def test_get_with_options(): data = b"the quick brown fox jumps over the lazy dog," * 100 path = "big-data.txt" - obs.put(store, path, data) + store.put(path, data) - result = obs.get(store, path, options={"range": (5, 10)}) + result = store.get(path, options={"range": (5, 10)}) assert result.range == (5, 10) buf = result.bytes() assert buf == data[5:10] # Test list input - result = obs.get(store, path, options={"range": [5, 10]}) + result = store.get(path, options={"range": [5, 10]}) assert result.range == (5, 10) buf = result.bytes() assert buf == data[5:10] @@ -73,9 +72,9 @@ def test_get_with_options_offset(): data = b"the quick brown fox jumps over the lazy dog," * 100 path = "big-data.txt" - obs.put(store, path, data) + store.put(path, data) - result = obs.get(store, path, options={"range": {"offset": 100}}) + result = store.get(path, options={"range": {"offset": 100}}) result_range = result.range assert result_range == (100, 4400) buf = result.bytes() @@ -88,9 +87,9 @@ def test_get_with_options_suffix(): data = b"the quick brown fox jumps over the lazy dog," * 100 path = "big-data.txt" - obs.put(store, path, data) + store.put(path, data) - result = obs.get(store, path, options={"range": {"suffix": 100}}) + result = store.get(path, options={"range": {"suffix": 100}}) result_range = result.range assert result_range == (4300, 4400) buf = result.bytes() @@ -103,12 +102,12 @@ def test_get_range(): data = b"the quick brown fox jumps over the lazy dog," * 100 path = "big-data.txt" - obs.put(store, path, data) - buffer = obs.get_range(store, path, start=5, end=15) + store.put(path, data) + buffer = store.get_range(path, start=5, end=15) view = memoryview(buffer) assert view == data[5:15] - buffer = obs.get_range(store, path, start=5, length=10) + buffer = store.get_range(path, start=5, length=10) view = memoryview(buffer) assert view == data[5:15] @@ -119,17 +118,17 @@ def test_get_ranges(): data = b"the quick brown fox jumps over the lazy dog," * 100 path = "big-data.txt" - obs.put(store, path, data) + store.put(path, data) starts = [5, 10, 15, 20] ends = [15, 20, 25, 30] - buffers = obs.get_ranges(store, path, starts=starts, ends=ends) + buffers = store.get_ranges(path, starts=starts, ends=ends) for start, end, buffer in zip(starts, ends, buffers, strict=True): assert memoryview(buffer) == data[start:end] lengths = [10, 10, 10, 10] - buffers = obs.get_ranges(store, path, starts=starts, lengths=lengths) + buffers = store.get_ranges(path, starts=starts, lengths=lengths) for start, end, buffer in zip(starts, ends, buffers, strict=True): assert memoryview(buffer) == data[start:end] @@ -140,16 +139,16 @@ def test_get_range_invalid_range(): data = b"the quick brown fox jumps over the lazy dog," * 100 path = "big-data.txt" - obs.put(store, path, data) + store.put(path, data) with pytest.raises(ValueError, match="Invalid range"): - obs.get_range(store, path, start=10, end=10) + store.get_range(path, start=10, end=10) with pytest.raises(ValueError, match="Invalid range"): - obs.get_range(store, path, start=10, end=8) + store.get_range(path, start=10, end=8) with pytest.raises(ValueError, match="Invalid range"): - obs.get_range(store, path, start=10, length=0) + store.get_range(path, start=10, length=0) def test_get_ranges_invalid_range(): @@ -157,13 +156,13 @@ def test_get_ranges_invalid_range(): data = b"the quick brown fox jumps over the lazy dog," * 100 path = "big-data.txt" - obs.put(store, path, data) + store.put(path, data) with pytest.raises(ValueError, match="Invalid range"): - obs.get_ranges(store, path, starts=[10], ends=[10]) + store.get_ranges(path, starts=[10], ends=[10]) with pytest.raises(ValueError, match="Invalid range"): - obs.get_ranges(store, path, starts=[10, 20], ends=[18, 18]) + store.get_ranges(path, starts=[10, 20], ends=[18, 18]) with pytest.raises(ValueError, match="Invalid range"): - obs.get_ranges(store, path, starts=[10, 20], lengths=[10, 0]) + store.get_ranges(path, starts=[10, 20], lengths=[10, 0]) diff --git a/tests/test_list.py b/tests/test_list.py index 88784a16..f69884c9 100644 --- a/tests/test_list.py +++ b/tests/test_list.py @@ -1,18 +1,20 @@ +import pandas as pd +import polars as pl +import pyarrow as pa import pytest from arro3.core import RecordBatch, Table -import obstore as obs from obstore.store import MemoryStore def test_list(): store = MemoryStore() - obs.put(store, "file1.txt", b"foo") - obs.put(store, "file2.txt", b"bar") - obs.put(store, "file3.txt", b"baz") + store.put("file1.txt", b"foo") + store.put("file2.txt", b"bar") + store.put("file3.txt", b"baz") - stream = obs.list(store) + stream = store.list() result = stream.collect() assert len(result) == 3 @@ -21,9 +23,9 @@ def test_list_as_arrow(): store = MemoryStore() for i in range(100): - obs.put(store, f"file{i}.txt", b"foo") + store.put(f"file{i}.txt", b"foo") - stream = obs.list(store, return_arrow=True, chunk_size=10) + stream = store.list(return_arrow=True, chunk_size=10) yielded_batches = 0 for batch in stream: assert isinstance(batch, RecordBatch) @@ -32,7 +34,7 @@ def test_list_as_arrow(): assert yielded_batches == 10 - stream = obs.list(store, return_arrow=True, chunk_size=10) + stream = store.list(return_arrow=True, chunk_size=10) batch = stream.collect() assert isinstance(batch, RecordBatch) assert batch.num_rows == 100 @@ -43,9 +45,9 @@ async def test_list_stream_async(): store = MemoryStore() for i in range(100): - await obs.put_async(store, f"file{i}.txt", b"foo") + await store.put_async(f"file{i}.txt", b"foo") - stream = obs.list(store, return_arrow=True, chunk_size=10) + stream = store.list(return_arrow=True, chunk_size=10) yielded_batches = 0 async for batch in stream: assert isinstance(batch, RecordBatch) @@ -54,7 +56,7 @@ async def test_list_stream_async(): assert yielded_batches == 10 - stream = obs.list(store, return_arrow=True, chunk_size=10) + stream = store.list(return_arrow=True, chunk_size=10) batch = await stream.collect_async() assert isinstance(batch, RecordBatch) assert batch.num_rows == 100 @@ -63,65 +65,78 @@ async def test_list_stream_async(): def test_list_with_delimiter(): store = MemoryStore() - obs.put(store, "a/file1.txt", b"foo") - obs.put(store, "a/file2.txt", b"bar") - obs.put(store, "b/file3.txt", b"baz") + store.put("a/file1.txt", b"foo") + store.put("a/file2.txt", b"bar") + store.put("b/file3.txt", b"baz") - list_result1 = obs.list_with_delimiter(store) + list_result1 = store.list_with_delimiter() assert list_result1["common_prefixes"] == ["a", "b"] assert list_result1["objects"] == [] - list_result2 = obs.list_with_delimiter(store, "a") + list_result2 = store.list_with_delimiter("a") assert list_result2["common_prefixes"] == [] assert list_result2["objects"][0]["path"] == "a/file1.txt" assert list_result2["objects"][1]["path"] == "a/file2.txt" - list_result3 = obs.list_with_delimiter(store, "b") + list_result3 = store.list_with_delimiter("b") assert list_result3["common_prefixes"] == [] assert list_result3["objects"][0]["path"] == "b/file3.txt" # Test returning arrow - list_result1 = obs.list_with_delimiter(store, return_arrow=True) + list_result1 = store.list_with_delimiter(return_arrow=True) assert list_result1["common_prefixes"] == ["a", "b"] - assert list_result1["objects"].num_rows == 0 + assert Table(list_result1["objects"]).num_rows == 0 assert isinstance(list_result1["objects"], Table) - list_result2 = obs.list_with_delimiter(store, "a", return_arrow=True) + list_result2 = store.list_with_delimiter("a", return_arrow=True) assert list_result2["common_prefixes"] == [] - assert list_result2["objects"].num_rows == 2 - assert list_result2["objects"]["path"][0].as_py() == "a/file1.txt" - assert list_result2["objects"]["path"][1].as_py() == "a/file2.txt" + objects = Table(list_result2["objects"]) + assert objects.num_rows == 2 + assert objects["path"][0].as_py() == "a/file1.txt" + assert objects["path"][1].as_py() == "a/file2.txt" @pytest.mark.asyncio async def test_list_with_delimiter_async(): store = MemoryStore() - await obs.put_async(store, "a/file1.txt", b"foo") - await obs.put_async(store, "a/file2.txt", b"bar") - await obs.put_async(store, "b/file3.txt", b"baz") + await store.put_async("a/file1.txt", b"foo") + await store.put_async("a/file2.txt", b"bar") + await store.put_async("b/file3.txt", b"baz") - list_result1 = await obs.list_with_delimiter_async(store) + list_result1 = await store.list_with_delimiter_async() assert list_result1["common_prefixes"] == ["a", "b"] assert list_result1["objects"] == [] - list_result2 = await obs.list_with_delimiter_async(store, "a") + list_result2 = await store.list_with_delimiter_async("a") assert list_result2["common_prefixes"] == [] assert list_result2["objects"][0]["path"] == "a/file1.txt" assert list_result2["objects"][1]["path"] == "a/file2.txt" - list_result3 = await obs.list_with_delimiter_async(store, "b") + list_result3 = await store.list_with_delimiter_async("b") assert list_result3["common_prefixes"] == [] assert list_result3["objects"][0]["path"] == "b/file3.txt" # Test returning arrow - list_result1 = await obs.list_with_delimiter_async(store, return_arrow=True) + list_result1 = await store.list_with_delimiter_async(return_arrow=True) assert list_result1["common_prefixes"] == ["a", "b"] - assert list_result1["objects"].num_rows == 0 + assert Table(list_result1["objects"]).num_rows == 0 assert isinstance(list_result1["objects"], Table) - list_result2 = await obs.list_with_delimiter_async(store, "a", return_arrow=True) + list_result2 = await store.list_with_delimiter_async("a", return_arrow=True) assert list_result2["common_prefixes"] == [] - assert list_result2["objects"].num_rows == 2 - assert list_result2["objects"]["path"][0].as_py() == "a/file1.txt" - assert list_result2["objects"]["path"][1].as_py() == "a/file2.txt" + objects = Table(list_result2["objects"]) + assert objects.num_rows == 2 + assert objects["path"][0].as_py() == "a/file1.txt" + assert objects["path"][1].as_py() == "a/file2.txt" + + +def test_list_as_arrow_to_polars(): + store = MemoryStore() + + for i in range(100): + store.put(f"file{i}.txt", b"foo") + + stream = store.list(return_arrow=True, chunk_size=10) + _pl_df = pl.DataFrame(next(stream)) + _df = pa.record_batch(next(stream)).to_pandas(types_mapper=pd.ArrowDtype) diff --git a/tests/test_obspec.py b/tests/test_obspec.py new file mode 100644 index 00000000..b1581534 --- /dev/null +++ b/tests/test_obspec.py @@ -0,0 +1,66 @@ +from arro3.core import RecordBatch, Table + +# TODO: fix imports +from obspec._get import Get, GetAsync +from obspec._list import List, ListWithDelimiter, ListWithDelimiterAsync +from obspec._put import Put, PutAsync + +from obstore.store import MemoryStore + + +def store(): + return MemoryStore() + + +def accepts_get(store: Get): + store.get("path") + + +async def accepts_get_async(store: GetAsync): + await store.get_async("path") + + +def accepts_list(store: List): + objects = store.list().collect() + assert isinstance(objects, list) + + objects = next(store.list(return_arrow=True)) + _rb = RecordBatch(objects) + + +def accepts_list_with_delimiter(store: ListWithDelimiter): + objects = store.list_with_delimiter() + assert isinstance(objects["objects"], list) + + objects = store.list_with_delimiter(return_arrow=True) + _rb = Table(objects["objects"]) + + +async def accepts_list_with_delimiter_async(store: ListWithDelimiterAsync): + objects = await store.list_with_delimiter_async() + assert isinstance(objects["objects"], list) + + objects = await store.list_with_delimiter_async(return_arrow=True) + _rb = Table(objects["objects"]) + + +def accepts_put(store: Put): + store.put("path", b"content") + + +async def accepts_put_async(store: PutAsync): + await store.put_async("path", b"content") + + +async def test_typing(): + store = MemoryStore() + + # Make sure to put "put" first + accepts_put(store) + await accepts_put_async(store) + + accepts_get(store) + await accepts_get_async(store) + accepts_list(store) + accepts_list_with_delimiter(store) + await accepts_list_with_delimiter_async(store) diff --git a/tests/test_put.py b/tests/test_put.py index 795fcccf..79a2bc48 100644 --- a/tests/test_put.py +++ b/tests/test_put.py @@ -2,7 +2,6 @@ import pytest -import obstore as obs from obstore.exceptions import AlreadyExistsError from obstore.store import MemoryStore @@ -10,8 +9,8 @@ def test_put_non_multipart(): store = MemoryStore() - obs.put(store, "file1.txt", b"foo", use_multipart=False) - assert obs.get(store, "file1.txt").bytes() == b"foo" + store.put("file1.txt", b"foo", use_multipart=False) + assert store.get("file1.txt").bytes() == b"foo" def test_put_non_multipart_sync_iterable(): @@ -19,8 +18,8 @@ def test_put_non_multipart_sync_iterable(): b = b"the quick brown fox jumps over the lazy dog," iterator = itertools.repeat(b, 5) - obs.put(store, "file1.txt", iterator, use_multipart=False) - assert obs.get(store, "file1.txt").bytes() == (b * 5) + store.put("file1.txt", iterator, use_multipart=False) + assert store.get("file1.txt").bytes() == (b * 5) @pytest.mark.asyncio @@ -33,15 +32,15 @@ async def it(): for _ in range(5): yield b"the quick brown fox jumps over the lazy dog," - await obs.put_async(store, "file1.txt", it(), use_multipart=False) - assert obs.get(store, "file1.txt").bytes() == (b * 5) + await store.put_async("file1.txt", it(), use_multipart=False) + assert store.get("file1.txt").bytes() == (b * 5) def test_put_multipart_one_chunk(): store = MemoryStore() - obs.put(store, "file1.txt", b"foo", use_multipart=True) - assert obs.get(store, "file1.txt").bytes() == b"foo" + store.put("file1.txt", b"foo", use_multipart=True) + assert store.get("file1.txt").bytes() == b"foo" def test_put_multipart_large(): @@ -50,20 +49,20 @@ def test_put_multipart_large(): data = b"the quick brown fox jumps over the lazy dog," * 5000 path = "big-data.txt" - obs.put(store, path, data, use_multipart=True) - assert obs.get(store, path).bytes() == data + store.put(path, data, use_multipart=True) + assert store.get(path).bytes() == data def test_put_mode(): store = MemoryStore() - obs.put(store, "file1.txt", b"foo") - obs.put(store, "file1.txt", b"bar", mode="overwrite") + store.put("file1.txt", b"foo") + store.put("file1.txt", b"bar", mode="overwrite") with pytest.raises(AlreadyExistsError): - obs.put(store, "file1.txt", b"foo", mode="create") + store.put("file1.txt", b"foo", mode="create") - assert obs.get(store, "file1.txt").bytes() == b"bar" + assert store.get("file1.txt").bytes() == b"bar" @pytest.mark.asyncio @@ -73,14 +72,14 @@ async def test_put_async_iterable(): data = b"the quick brown fox jumps over the lazy dog," * 50_000 path = "big-data.txt" - await obs.put_async(store, path, data) + await store.put_async(path, data) - resp = await obs.get_async(store, path) + resp = await store.get_async(path) stream = resp.stream(min_chunk_size=0) new_path = "new-path.txt" - await obs.put_async(store, new_path, stream) + await store.put_async(new_path, stream) - assert obs.get(store, new_path).bytes() == data + assert store.get(new_path).bytes() == data def test_put_sync_iterable(): @@ -91,6 +90,6 @@ def test_put_sync_iterable(): data = b * 50_000 path = "big-data.txt" - obs.put(store, path, iterator) + store.put(path, iterator) - assert obs.get(store, path).bytes() == data + assert store.get(path).bytes() == data diff --git a/uv.lock b/uv.lock index 93ad07cc..73b6d378 100644 --- a/uv.lock +++ b/uv.lock @@ -1386,6 +1386,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567 }, ] +[[package]] +name = "polars" +version = "1.24.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bf/d3/453bcecbe14a5aba6be47c99d81f4e1f941d3de729d5e0ce5c7d527c05ed/polars-1.24.0.tar.gz", hash = "sha256:6e7553789495081c998f5e4ad4ebc7e19e970a9cc83326d40461564e85ad226d", size = 4446066 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/3f/16f87d9ec4707d717a434bc54307506594522de99fdfe3d5d76233912c94/polars-1.24.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:563b99a6597fe77a3c89d478e4a6fb49c063f44ef84d4adefe490e14626e2f99", size = 33792674 }, + { url = "https://files.pythonhosted.org/packages/35/cd/27353d0b9331d60a95f5708370441348d4a3af0f609961ceaaa3b583190f/polars-1.24.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:6ad64d938d60b7fda39b60892ef67bc6a9942e0c7170db593a65d019e8730b09", size = 30469541 }, + { url = "https://files.pythonhosted.org/packages/a7/db/668d8328b1c3d8381023fc4ed905a88b93cca041c088f42a94dbd6822469/polars-1.24.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:331e737465b8d954bec51e6906bdc6e979a6ee52f97ffe5e8d0c10794a46bfd9", size = 34313540 }, + { url = "https://files.pythonhosted.org/packages/12/16/f95207616b2e802c381459cf01f0d233daa98bdc4e394ec88044af9e927f/polars-1.24.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:3c6c774aebdd5cd601839594986648789352f72b8893f4b7e34224e75b060c8d", size = 31490266 }, + { url = "https://files.pythonhosted.org/packages/8b/82/cb0512747ec5508a4f840a521feb27f28a81eac1aef6c92fc25643073579/polars-1.24.0-cp39-abi3-win_amd64.whl", hash = "sha256:a5a473ff44fe1b9e3e7a9013a9321efe841d858e89cf33d424e6f3fef3ea4d5e", size = 34678593 }, + { url = "https://files.pythonhosted.org/packages/67/00/db3810803938467a215c1f161ff21ad6fef581d5ac1381ee2990d0180c19/polars-1.24.0-cp39-abi3-win_arm64.whl", hash = "sha256:5ea781ca8e0a39c3b677171dbd852e5fa2d5c53417b5fbd69d711b6044a49eaa", size = 30892251 }, +] + [[package]] name = "prompt-toolkit" version = "3.0.48" @@ -2020,6 +2034,7 @@ dev = [ { name = "obspec" }, { name = "pandas" }, { name = "pip" }, + { name = "polars" }, { name = "pyarrow" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -2044,9 +2059,10 @@ dev = [ { name = "mkdocstrings", specifier = ">=0.27.0" }, { name = "mkdocstrings-python", specifier = ">=1.13.0" }, { name = "moto", extras = ["s3", "server"], specifier = ">=5.0.18" }, - { name = "obspec", specifier = "==0.1.0b1" }, + { name = "obspec", specifier = ">=0.1.0b1" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "pip", specifier = ">=24.2" }, + { name = "polars", specifier = ">=1.24.0" }, { name = "pyarrow", specifier = ">=17.0.0" }, { name = "pytest", specifier = ">=8.3.3" }, { name = "pytest-asyncio", specifier = ">=0.24.0" },