Skip to content

Add class methods wrapper #331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions docs/api/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,3 @@
::: obstore.list
::: obstore.list_with_delimiter
::: obstore.list_with_delimiter_async
::: obstore.ListResult
::: obstore.ListStream
::: obstore.ListChunkType
3 changes: 3 additions & 0 deletions docs/api/store/aws.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# AWS S3

::: obstore.store.S3Store
options:
inherited_members: true
show_bases: false
::: obstore.store.S3Config
options:
show_if_no_docstring: true
Expand Down
3 changes: 3 additions & 0 deletions docs/api/store/azure.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Microsoft Azure

::: obstore.store.AzureStore
options:
inherited_members: true
show_bases: false
::: obstore.store.AzureConfig
options:
show_if_no_docstring: true
Expand Down
3 changes: 3 additions & 0 deletions docs/api/store/gcs.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Google Cloud Storage

::: obstore.store.GCSStore
options:
inherited_members: true
show_bases: false
::: obstore.store.GCSConfig
options:
show_if_no_docstring: true
Expand Down
3 changes: 3 additions & 0 deletions docs/api/store/http.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# HTTP

::: obstore.store.HTTPStore
options:
inherited_members: true
show_bases: false
3 changes: 3 additions & 0 deletions docs/api/store/local.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Local

::: obstore.store.LocalStore
options:
inherited_members: true
show_bases: false
3 changes: 3 additions & 0 deletions docs/api/store/memory.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Memory

::: obstore.store.MemoryStore
options:
inherited_members: true
show_bases: false
13 changes: 13 additions & 0 deletions docs/cookbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,19 @@ store = ... # store of your choice
# Get a stream of Arrow RecordBatches of metadata
list_stream = obs.list(store, prefix="data", return_arrow=True)
for record_batch in list_stream:
# Perform zero-copy conversion to your arrow-backed library of choice
#
# To pyarrow:
# pyarrow.record_batch(record_batch)
#
# To polars:
# polars.DataFrame(record_batch)
#
# To pandas (with Arrow-backed data-types):
# pyarrow.record_batch(record_batch).to_pandas(types_mapper=pd.ArrowDtype)
#
# To arro3:
# arro3.core.RecordBatch(record_batch)
print(record_batch.num_rows)
```

Expand Down
49 changes: 45 additions & 4 deletions obstore/python/obstore/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,50 @@
from typing import TYPE_CHECKING

from ._obstore import *
from ._obstore import ___version
from . import store
from ._obstore import (
Bytes,
___version,
copy,
copy_async,
delete,
delete_async,
get,
get_async,
get_range,
get_range_async,
get_ranges,
get_ranges_async,
head,
head_async,
list, # noqa: A004
list_with_delimiter,
list_with_delimiter_async,
open_reader,
open_reader_async,
open_writer,
open_writer_async,
put,
put_async,
rename,
rename_async,
sign,
sign_async,
)

if TYPE_CHECKING:
from . import exceptions, store

from . import _store, exceptions
from ._obstore import (
HTTP_METHOD,
AsyncReadableFile,
AsyncWritableFile,
Bytes,
BytesStream,
GetResult,
ListChunkType,
ListResult,
ListStream,
ReadableFile,
SignCapableStore,
WritableFile,
)
__version__: str = ___version()
2 changes: 1 addition & 1 deletion obstore/python/obstore/_buffered.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ from obspec._attributes import Attributes

from ._bytes import Bytes
from ._list import ObjectMeta
from .store import ObjectStore
from ._store import ObjectStore

if sys.version_info >= (3, 12):
from collections.abc import Buffer
Expand Down
2 changes: 1 addition & 1 deletion obstore/python/obstore/_copy.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .store import ObjectStore
from ._store import ObjectStore

def copy(store: ObjectStore, from_: str, to: str, *, overwrite: bool = True) -> None:
"""Copy an object from one path to another in the same object store.
Expand Down
2 changes: 1 addition & 1 deletion obstore/python/obstore/_delete.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections.abc import Sequence

from .store import ObjectStore
from ._store import ObjectStore

def delete(store: ObjectStore, paths: str | Sequence[str]) -> None:
"""Delete the object at the specified location(s).
Expand Down
2 changes: 1 addition & 1 deletion obstore/python/obstore/_get.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from obspec._get import GetOptions

from ._bytes import Bytes
from ._list import ObjectMeta
from .store import ObjectStore
from ._store import ObjectStore

class GetResult:
"""Result for a get request.
Expand Down
79 changes: 7 additions & 72 deletions obstore/python/obstore/_list.pyi
Original file line number Diff line number Diff line change
@@ -1,77 +1,12 @@
# ruff: noqa: UP006
# ruff: noqa: UP035
# Use `list` instead of `List` for type annotation
# `typing.List` is deprecated, use `list` instead
# ruff: noqa: A001
# Variable `list` is shadowing a Python builtinRuff
# ruff: noqa: A001, UP006, UP035

from typing import Generic, List, Literal, Self, TypedDict, TypeVar, overload
from typing import List, Literal, overload

from arro3.core import RecordBatch, Table
from obspec._list import ListResult, ListStream
from obspec._meta import ObjectMeta

from .store import ObjectStore

ListChunkType = TypeVar("ListChunkType", List[ObjectMeta], RecordBatch, Table) # noqa: PYI001
"""The data structure used for holding list results.

By default, listing APIs return a `list` of [`ObjectMeta`][obspec.ObjectMeta]. However
for improved performance when listing large buckets, you can pass `return_arrow=True`.
Then an Arrow `RecordBatch` will be returned instead.

This implements [`obspec.ListChunkType_co`][], but is redefined here to specialize the
exact instance of the Arrow return type, given that in the obstore implementation, an
[`arro3.core.RecordBatch`][] or [`arro3.core.Table`][] will always be returned.
"""

class ListResult(TypedDict, Generic[ListChunkType]):
"""Result of a list call.

Includes objects, prefixes (directories) and a token for the next set of results.
Individual result sets may be limited to 1,000 objects based on the underlying
object storage's limitations.

This implements [`obspec.ListResult`][].
"""

common_prefixes: List[str]
"""Prefixes that are common (like directories)"""

objects: ListChunkType
"""Object metadata for the listing"""

class ListStream(Generic[ListChunkType]):
"""A stream of [ObjectMeta][obspec.ObjectMeta] that can be polled in a sync or
async fashion.

This implements [`obspec.ListStream`][].
""" # noqa: D205

def __aiter__(self) -> Self:
"""Return `Self` as an async iterator."""

def __iter__(self) -> Self:
"""Return `Self` as an async iterator."""

async def collect_async(self) -> ListChunkType:
"""Collect all remaining ObjectMeta objects in the stream.

This ignores the `chunk_size` parameter from the `list` call and collects all
remaining data into a single chunk.
"""

def collect(self) -> ListChunkType:
"""Collect all remaining ObjectMeta objects in the stream.

This ignores the `chunk_size` parameter from the `list` call and collects all
remaining data into a single chunk.
"""

async def __anext__(self) -> ListChunkType:
"""Return the next chunk of ObjectMeta in the stream."""

def __next__(self) -> ListChunkType:
"""Return the next chunk of ObjectMeta in the stream."""
from ._store import ObjectStore

@overload
def list(
Expand Down Expand Up @@ -163,7 +98,7 @@ def list(
!!! note
There is no async version of this method, because `list` is not async under the
hood, rather it only instantiates a stream, which can be polled in synchronous
or asynchronous fashion. See [`ListStream`][obstore.ListStream].
or asynchronous fashion. See [`ListStream`][obspec.ListStream].

Args:
store: The ObjectStore instance to use.
Expand All @@ -174,8 +109,8 @@ def list(
chunk_size: The number of items to collect per chunk in the returned
(async) iterator. All chunks except for the last one will have this many
items. This is ignored in the
[`collect`][obstore.ListStream.collect] and
[`collect_async`][obstore.ListStream.collect_async] methods of
[`collect`][obspec.ListStream.collect] and
[`collect_async`][obspec.ListStream.collect_async] methods of
`ListStream`.
return_arrow: If `True`, return each batch of list items as an Arrow
`RecordBatch`, not as a list of Python `dict`s. Arrow removes serialization
Expand Down
5 changes: 2 additions & 3 deletions obstore/python/obstore/_obstore.pyi
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import _store as _store
from ._buffered import AsyncReadableFile as AsyncReadableFile
from ._buffered import AsyncWritableFile as AsyncWritableFile
from ._buffered import ReadableFile as ReadableFile
Expand All @@ -21,16 +22,14 @@ from ._get import get_ranges as get_ranges
from ._get import get_ranges_async as get_ranges_async
from ._head import head as head
from ._head import head_async as head_async
from ._list import ListChunkType as ListChunkType
from ._list import ListResult as ListResult
from ._list import ListStream as ListStream
from ._list import list as list # noqa: A004
from ._list import list_with_delimiter as list_with_delimiter
from ._list import list_with_delimiter_async as list_with_delimiter_async
from ._put import put as put
from ._put import put_async as put_async
from ._rename import rename as rename
from ._rename import rename_async as rename_async
from ._scheme import parse_scheme as parse_scheme
from ._sign import HTTP_METHOD as HTTP_METHOD
from ._sign import SignCapableStore as SignCapableStore
from ._sign import sign as sign
Expand Down
2 changes: 1 addition & 1 deletion obstore/python/obstore/_rename.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .store import ObjectStore
from ._store import ObjectStore

def rename(store: ObjectStore, from_: str, to: str, *, overwrite: bool = True) -> None:
"""Move an object from one path to another in the same object store.
Expand Down
5 changes: 5 additions & 0 deletions obstore/python/obstore/_scheme.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from typing import Literal

def parse_scheme(
url: str,
) -> Literal["s3", "gcs", "http", "local", "memory", "azure"]: ...
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# TODO: move to reusable types package
from collections.abc import Callable
from pathlib import Path
from typing import Any, TypeAlias, Unpack, overload
from typing import Any, Self, TypeAlias, Unpack, overload

from ._aws import S3Config as S3Config
from ._aws import S3Credential as S3Credential
Expand Down Expand Up @@ -154,7 +154,7 @@ class LocalStore:
*,
automatic_cleanup: bool = False,
mkdir: bool = False,
) -> LocalStore:
) -> Self:
"""Construct a new LocalStore from a `file://` URL.

**Examples:**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from collections.abc import Coroutine
from datetime import datetime
from typing import Any, Literal, NotRequired, Protocol, TypeAlias, TypedDict, Unpack
from typing import (
Any,
Literal,
NotRequired,
Protocol,
Self,
TypeAlias,
TypedDict,
Unpack,
)

from ._client import ClientConfig
from ._retry import RetryConfig
Expand Down Expand Up @@ -501,7 +510,7 @@ class S3Store:
retry_config: RetryConfig | None = None,
credential_provider: S3CredentialProvider | None = None,
**kwargs: Unpack[S3Config],
) -> S3Store:
) -> Self:
"""Parse available connection info from a well-known storage URL.

The supported url schemes are:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections.abc import Coroutine
from datetime import datetime
from typing import Any, Protocol, TypeAlias, TypedDict, Unpack
from typing import Any, Protocol, Self, TypeAlias, TypedDict, Unpack

from ._client import ClientConfig
from ._retry import RetryConfig
Expand Down Expand Up @@ -353,7 +353,7 @@ class AzureStore:
retry_config: RetryConfig | None = None,
credential_provider: AzureCredentialProvider | None = None,
**kwargs: Unpack[AzureConfig],
) -> AzureStore:
) -> Self:
"""Construct a new AzureStore with values populated from a well-known storage URL.

The supported url schemes are:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections.abc import Coroutine
from datetime import datetime
from typing import Any, Protocol, TypedDict, Unpack
from typing import Any, Protocol, Self, TypedDict, Unpack

from ._client import ClientConfig
from ._retry import RetryConfig
Expand Down Expand Up @@ -164,7 +164,7 @@ class GCSStore:
retry_config: RetryConfig | None = None,
credential_provider: GCSCredentialProvider | None = None,
**kwargs: Unpack[GCSConfig],
) -> GCSStore:
) -> Self:
"""Construct a new GCSStore with values populated from a well-known storage URL.

The supported url schemes are:
Expand Down
Loading
Loading