diff --git a/docs/api/attributes.md b/docs/api/attributes.md deleted file mode 100644 index 66a14665..00000000 --- a/docs/api/attributes.md +++ /dev/null @@ -1,4 +0,0 @@ -# Attributes - -::: obstore.Attribute -::: obstore.Attributes diff --git a/docs/api/get.md b/docs/api/get.md index 13cb98b5..d8a27c15 100644 --- a/docs/api/get.md +++ b/docs/api/get.md @@ -6,9 +6,6 @@ ::: obstore.get_range_async ::: obstore.get_ranges ::: obstore.get_ranges_async -::: obstore.GetOptions ::: obstore.GetResult ::: obstore.BytesStream ::: obstore.Bytes -::: obstore.OffsetRange -::: obstore.SuffixRange diff --git a/docs/api/list.md b/docs/api/list.md index 92bdb0c0..64bafa8a 100644 --- a/docs/api/list.md +++ b/docs/api/list.md @@ -3,7 +3,6 @@ ::: obstore.list ::: obstore.list_with_delimiter ::: obstore.list_with_delimiter_async -::: obstore.ObjectMeta ::: obstore.ListResult ::: obstore.ListStream ::: obstore.ListChunkType diff --git a/docs/api/put.md b/docs/api/put.md index 93954b53..66c48df5 100644 --- a/docs/api/put.md +++ b/docs/api/put.md @@ -2,6 +2,3 @@ ::: obstore.put ::: obstore.put_async -::: obstore.PutResult -::: obstore.UpdateVersion -::: obstore.PutMode diff --git a/docs/blog/posts/obstore-0.4.md b/docs/blog/posts/obstore-0.4.md index 3bca5d27..3d6f11a3 100644 --- a/docs/blog/posts/obstore-0.4.md +++ b/docs/blog/posts/obstore-0.4.md @@ -72,7 +72,7 @@ Obstore version 0.5 is expected to improve on extensible credentials by enabling ## Return Arrow data from `list_with_delimiter` -By default, the [`obstore.list`][] and [`obstore.list_with_delimiter`][] APIs [return standard Python `dict`s][obstore.ObjectMeta]. However, if you're listing a large bucket, the overhead of materializing all those Python objects can become significant. +By default, the [`obstore.list`][] and [`obstore.list_with_delimiter`][] APIs [return standard Python `dict`s][obspec.ObjectMeta]. However, if you're listing a large bucket, the overhead of materializing all those Python objects can become significant. [`obstore.list`][] and [`obstore.list_with_delimiter`][] now both support a `return_arrow` keyword parameter. If set to `True`, an Arrow [`RecordBatch`][arro3.core.RecordBatch] or [`Table`][arro3.core.Table] will be returned, which is both faster and more memory efficient. diff --git a/mkdocs.yml b/mkdocs.yml index 4541b03d..60e6d7c0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -62,7 +62,6 @@ nav: - api/put.md - api/rename.md - api/sign.md - - api/attributes.md - api/exceptions.md - api/file.md - obstore.fsspec: api/fsspec.md @@ -157,6 +156,7 @@ plugins: - https://arrow.apache.org/docs/objects.inv - https://boto3.amazonaws.com/v1/documentation/api/latest/objects.inv - https://botocore.amazonaws.com/v1/documentation/api/latest/objects.inv + - https://developmentseed.org/obspec/latest/objects.inv - https://docs.aiohttp.org/en/stable/objects.inv - https://docs.pola.rs/api/python/stable/objects.inv - https://docs.python.org/3/objects.inv diff --git a/obstore/python/obstore/_attributes.pyi b/obstore/python/obstore/_attributes.pyi deleted file mode 100644 index 90ff251a..00000000 --- a/obstore/python/obstore/_attributes.pyi +++ /dev/null @@ -1,47 +0,0 @@ -from typing import Literal, TypeAlias - -Attribute: TypeAlias = ( - Literal[ - "Content-Disposition", - "Content-Encoding", - "Content-Language", - "Content-Type", - "Cache-Control", - ] - | str -) -"""Additional object attribute types. - -- `"Content-Disposition"`: Specifies how the object should be handled by a browser. - - See [Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition). - -- `"Content-Encoding"`: Specifies the encodings applied to the object. - - See [Content-Encoding](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding). - -- `"Content-Language"`: Specifies the language of the object. - - See [Content-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language). - -- `"Content-Type"`: Specifies the MIME type of the object. - - This takes precedence over any client configuration. - - See [Content-Type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). - -- `"Cache-Control"`: Overrides cache control policy of the object. - - See [Cache-Control](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control). - -Any other string key specifies a user-defined metadata field for the object. -""" - -Attributes: TypeAlias = dict[Attribute, str] -"""Additional attributes of an object - -Attributes can be specified in [`put`][obstore.put]/[`put_async`][obstore.put_async] and -retrieved from [`get`][obstore.get]/[`get_async`][obstore.get_async]. - -Unlike ObjectMeta, Attributes are not returned by listing APIs -""" diff --git a/obstore/python/obstore/_buffered.pyi b/obstore/python/obstore/_buffered.pyi index 441face3..395f185a 100644 --- a/obstore/python/obstore/_buffered.pyi +++ b/obstore/python/obstore/_buffered.pyi @@ -2,7 +2,9 @@ import sys from contextlib import AbstractAsyncContextManager, AbstractContextManager from typing import Self -from ._attributes import Attributes +# TODO: fix import +from obspec._attributes import Attributes + from ._bytes import Bytes from ._list import ObjectMeta from .store import ObjectStore diff --git a/obstore/python/obstore/_get.pyi b/obstore/python/obstore/_get.pyi index d4214851..033e90ae 100644 --- a/obstore/python/obstore/_get.pyi +++ b/obstore/python/obstore/_get.pyi @@ -1,119 +1,13 @@ from collections.abc import Sequence -from datetime import datetime -from typing import TypedDict -from ._attributes import Attributes +# TODO: fix imports +from obspec._attributes import Attributes +from obspec._get import GetOptions + from ._bytes import Bytes from ._list import ObjectMeta from .store import ObjectStore -class OffsetRange(TypedDict): - """Request all bytes starting from a given byte offset.""" - - offset: int - """The byte offset for the offset range request.""" - -class SuffixRange(TypedDict): - """Request up to the last `n` bytes.""" - - suffix: int - """The number of bytes from the suffix to request.""" - -class GetOptions(TypedDict, total=False): - """Options for a get request. - - All options are optional. - """ - - if_match: str | None - """ - Request will succeed if the `ObjectMeta::e_tag` matches - otherwise returning [`PreconditionError`][obstore.exceptions.PreconditionError]. - - See - - Examples: - - ```text - If-Match: "xyzzy" - If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" - If-Match: * - ``` - """ - - if_none_match: str | None - """ - Request will succeed if the `ObjectMeta::e_tag` does not match - otherwise returning [`NotModifiedError`][obstore.exceptions.NotModifiedError]. - - See - - Examples: - - ```text - If-None-Match: "xyzzy" - If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" - If-None-Match: * - ``` - """ - - if_unmodified_since: datetime | None - """ - Request will succeed if the object has been modified since - - - """ - - if_modified_since: datetime | None - """ - Request will succeed if the object has not been modified since - otherwise returning [`PreconditionError`][obstore.exceptions.PreconditionError]. - - Some stores, such as S3, will only return `NotModified` for exact - timestamp matches, instead of for any timestamp greater than or equal. - - - """ - - range: tuple[int, int] | list[int] | OffsetRange | SuffixRange - """ - Request transfer of only the specified range of bytes - otherwise returning [`NotModifiedError`][obstore.exceptions.NotModifiedError]. - - The semantics of this tuple are: - - - `(int, int)`: Request a specific range of bytes `(start, end)`. - - If the given range is zero-length or starts after the end of the object, an - error will be returned. Additionally, if the range ends after the end of the - object, the entire remainder of the object will be returned. Otherwise, the - exact requested range will be returned. - - The `end` offset is _exclusive_. - - - `{"offset": int}`: Request all bytes starting from a given byte offset. - - This is equivalent to `bytes={int}-` as an HTTP header. - - - `{"suffix": int}`: Request the last `int` bytes. Note that here, `int` is _the - size of the request_, not the byte offset. This is equivalent to `bytes=-{int}` - as an HTTP header. - - - """ - - version: str | None - """ - Request a particular object version - """ - - head: bool - """ - Request transfer of no content - - - """ - class GetResult: """Result for a get request. @@ -142,6 +36,9 @@ class GetResult: Note that after calling `bytes`, `bytes_async`, or `stream`, you will no longer be able to call other methods on this object, such as the `meta` attribute. + + This implements [`obspec.GetResult`][], but is redefined here to specialize the + exact instance of the `bytes` return type to be [`obstore.Bytes`][]. """ @property @@ -229,6 +126,9 @@ class BytesStream: To fix this, set the `timeout` parameter in the [`client_options`][obstore.store.ClientConfig] passed when creating the store. + + This implements [`obspec.BufferStream`][], but is redefined here to specialize the + exact instance of the buffer return type to be [`obstore.Bytes`][]. """ def __aiter__(self) -> BytesStream: diff --git a/obstore/python/obstore/_head.pyi b/obstore/python/obstore/_head.pyi index eb96f0bc..16ad0f70 100644 --- a/obstore/python/obstore/_head.pyi +++ b/obstore/python/obstore/_head.pyi @@ -1,4 +1,6 @@ -from ._list import ObjectMeta +# TODO: fix improt +from obspec._meta import ObjectMeta + from .store import ObjectStore def head(store: ObjectStore, path: str) -> ObjectMeta: diff --git a/obstore/python/obstore/_list.pyi b/obstore/python/obstore/_list.pyi index 8c07784e..cd0ce9b8 100644 --- a/obstore/python/obstore/_list.pyi +++ b/obstore/python/obstore/_list.pyi @@ -5,40 +5,23 @@ # ruff: noqa: A001 # Variable `list` is shadowing a Python builtinRuff -from datetime import datetime from typing import Generic, List, Literal, Self, TypedDict, TypeVar, overload from arro3.core import RecordBatch, Table +from obspec._meta import ObjectMeta from .store import ObjectStore -class ObjectMeta(TypedDict): - """The metadata that describes an object.""" - - path: str - """The full path to the object""" - - last_modified: datetime - """The last modified time""" - - size: int - """The size in bytes of the object""" - - e_tag: str | None - """The unique identifier for the object - - - """ - - version: str | None - """A version indicator for this object""" - ListChunkType = TypeVar("ListChunkType", List[ObjectMeta], RecordBatch, Table) # noqa: PYI001 """The data structure used for holding list results. -By default, listing APIs return a `list` of [`ObjectMeta`][obstore.ObjectMeta]. However +By default, listing APIs return a `list` of [`ObjectMeta`][obspec.ObjectMeta]. However for improved performance when listing large buckets, you can pass `return_arrow=True`. Then an Arrow `RecordBatch` will be returned instead. + +This implements [`obspec.ListChunkType_co`][], but is redefined here to specialize the +exact instance of the Arrow return type, given that in the obstore implementation, an +[`arro3.core.RecordBatch`][] or [`arro3.core.Table`][] will always be returned. """ class ListResult(TypedDict, Generic[ListChunkType]): @@ -47,6 +30,8 @@ class ListResult(TypedDict, Generic[ListChunkType]): Includes objects, prefixes (directories) and a token for the next set of results. Individual result sets may be limited to 1,000 objects based on the underlying object storage's limitations. + + This implements [`obspec.ListResult`][]. """ common_prefixes: List[str] @@ -56,8 +41,10 @@ class ListResult(TypedDict, Generic[ListChunkType]): """Object metadata for the listing""" class ListStream(Generic[ListChunkType]): - """A stream of [ObjectMeta][obstore.ObjectMeta] that can be polled in a sync or + """A stream of [ObjectMeta][obspec.ObjectMeta] that can be polled in a sync or async fashion. + + This implements [`obspec.ListStream`][]. """ # noqa: D205 def __aiter__(self) -> Self: @@ -170,7 +157,7 @@ def list( ``` !!! note - The order of returned [`ObjectMeta`][obstore.ObjectMeta] is not + The order of returned [`ObjectMeta`][obspec.ObjectMeta] is not guaranteed !!! note diff --git a/obstore/python/obstore/_obstore.pyi b/obstore/python/obstore/_obstore.pyi index e800ef8a..71f16f6f 100644 --- a/obstore/python/obstore/_obstore.pyi +++ b/obstore/python/obstore/_obstore.pyi @@ -1,5 +1,3 @@ -from ._attributes import Attribute as Attribute -from ._attributes import Attributes as Attributes from ._buffered import AsyncReadableFile as AsyncReadableFile from ._buffered import AsyncWritableFile as AsyncWritableFile from ._buffered import ReadableFile as ReadableFile @@ -14,10 +12,7 @@ from ._copy import copy_async as copy_async from ._delete import delete as delete from ._delete import delete_async as delete_async from ._get import BytesStream as BytesStream -from ._get import GetOptions as GetOptions from ._get import GetResult as GetResult -from ._get import OffsetRange as OffsetRange -from ._get import SuffixRange as SuffixRange from ._get import get as get from ._get import get_async as get_async from ._get import get_range as get_range @@ -29,13 +24,9 @@ from ._head import head_async as head_async from ._list import ListChunkType as ListChunkType from ._list import ListResult as ListResult from ._list import ListStream as ListStream -from ._list import ObjectMeta as ObjectMeta from ._list import list as list # noqa: A004 from ._list import list_with_delimiter as list_with_delimiter from ._list import list_with_delimiter_async as list_with_delimiter_async -from ._put import PutMode as PutMode -from ._put import PutResult as PutResult -from ._put import UpdateVersion as UpdateVersion from ._put import put as put from ._put import put_async as put_async from ._rename import rename as rename diff --git a/obstore/python/obstore/_put.pyi b/obstore/python/obstore/_put.pyi index 3f35139c..4780007a 100644 --- a/obstore/python/obstore/_put.pyi +++ b/obstore/python/obstore/_put.pyi @@ -1,9 +1,12 @@ import sys from collections.abc import AsyncIterable, AsyncIterator, Iterable, Iterator from pathlib import Path -from typing import IO, Literal, TypeAlias, TypedDict +from typing import IO + +# TODO: Fix imports +from obspec._attributes import Attributes +from obspec._put import PutMode, PutResult -from ._attributes import Attributes from .store import ObjectStore if sys.version_info >= (3, 12): @@ -11,58 +14,6 @@ if sys.version_info >= (3, 12): else: from typing_extensions import Buffer -class UpdateVersion(TypedDict, total=False): - """Uniquely identifies a version of an object to update. - - Stores will use differing combinations of `e_tag` and `version` to provide - conditional updates, and it is therefore recommended applications preserve both - """ - - e_tag: str | None - """The unique identifier for the newly created object. - - - """ - - version: str | None - """A version indicator for the newly created object.""" - -PutMode: TypeAlias = Literal["create", "overwrite"] | UpdateVersion -"""Configure preconditions for the put operation - -There are three modes: - -- Overwrite: Perform an atomic write operation, overwriting any object present at the - provided path. -- Create: Perform an atomic write operation, returning - [`AlreadyExistsError`][obstore.exceptions.AlreadyExistsError] if an object already - exists at the provided path. -- Update: Perform an atomic write operation if the current version of the object matches - the provided [`UpdateVersion`][obstore.UpdateVersion], returning - [`PreconditionError`][obstore.exceptions.PreconditionError] otherwise. - -If a string is provided, it must be one of: - -- `"overwrite"` -- `"create"` - -If a `dict` is provided, it must meet the criteria of -[`UpdateVersion`][obstore.UpdateVersion]. -""" - -class PutResult(TypedDict): - """Result for a put request.""" - - e_tag: str | None - """ - The unique identifier for the newly created object - - - """ - - version: str | None - """A version indicator for the newly created object.""" - def put( store: ObjectStore, path: str, @@ -114,7 +65,7 @@ def put( protocol. Keyword Args: - mode: Configure the [`PutMode`][obstore.PutMode] for this operation. Refer to the [`PutMode`][obstore.PutMode] docstring for more information. + mode: Configure the [`PutMode`][obspec.PutMode] for this operation. Refer to the [`PutMode`][obspec.PutMode] docstring for more information. If this provided and is not `"overwrite"`, a non-multipart upload will be performed. Defaults to `"overwrite"`. attributes: Provide a set of `Attributes`. Defaults to `None`. diff --git a/obstore/python/obstore/fsspec.py b/obstore/python/obstore/fsspec.py index c1b1584d..fc525c90 100644 --- a/obstore/python/obstore/fsspec.py +++ b/obstore/python/obstore/fsspec.py @@ -50,7 +50,10 @@ if TYPE_CHECKING: from collections.abc import Coroutine, Iterable - from obstore import Attributes, Bytes, ReadableFile, WritableFile + # TODO: fix import + from obspec._attributes import Attributes + + from obstore import Bytes, ReadableFile, WritableFile from obstore.store import ( AzureConfig, ClientConfig, diff --git a/pyproject.toml b/pyproject.toml index c6eaa8c3..f496aa0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,9 +18,10 @@ dev-dependencies = [ "mike>=2.1.3", "mkdocs-material[imaging]>=9.6.3", "mkdocs>=1.6.1", - "mkdocstrings>=0.27.0", "mkdocstrings-python>=1.13.0", + "mkdocstrings>=0.27.0", "moto[s3,server]>=5.0.18", + "obspec>=0.1.0-beta.1", "pandas>=2.2.3", "pip>=24.2", "pyarrow>=17.0.0", diff --git a/uv.lock b/uv.lock index dd023484..93ad07cc 100644 --- a/uv.lock +++ b/uv.lock @@ -1173,6 +1173,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/9c/4fce9cf39dde2562584e4cfd351a0140240f82c0e3569ce25a250f47037d/numpy-2.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268", size = 12693107 }, ] +[[package]] +name = "obspec" +version = "0.1.0b1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/50/4a/ef272abf32b3d397727224987ed0fe4f55589c7e6ab608f2e62dc60d8be1/obspec-0.1.0b1.tar.gz", hash = "sha256:4d3bee05724efe27d6974b1766bcd65dcd32a2440a3bc27469f0c5f0425d82fc", size = 90417 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/58/7783b352fcdd95b50a8246e405281890bd9db14afc0d416cbc304bc42e04/obspec-0.1.0b1-py3-none-any.whl", hash = "sha256:6a1f4475dbe4a950996b49eba993bab6bef0f0d85847e49183a5f58a4f1dcc04", size = 14807 }, +] + [[package]] name = "openapi-schema-validator" version = "0.6.3" @@ -2008,6 +2017,7 @@ dev = [ { name = "mkdocstrings" }, { name = "mkdocstrings-python" }, { name = "moto", extra = ["s3", "server"] }, + { name = "obspec" }, { name = "pandas" }, { name = "pip" }, { name = "pyarrow" }, @@ -2034,6 +2044,7 @@ dev = [ { name = "mkdocstrings", specifier = ">=0.27.0" }, { name = "mkdocstrings-python", specifier = ">=1.13.0" }, { name = "moto", extras = ["s3", "server"], specifier = ">=5.0.18" }, + { name = "obspec", specifier = "==0.1.0b1" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "pip", specifier = ">=24.2" }, { name = "pyarrow", specifier = ">=17.0.0" },