diff --git a/cloudpathlib/__init__.py b/cloudpathlib/__init__.py index da4fe28e..e11e1362 100644 --- a/cloudpathlib/__init__.py +++ b/cloudpathlib/__init__.py @@ -1,9 +1,11 @@ +import os import sys from .anypath import AnyPath from .azure.azblobclient import AzureBlobClient from .azure.azblobpath import AzureBlobPath from .cloudpath import CloudPath, implementation_registry +from .patches import patch_open, patch_os_functions, patch_glob from .s3.s3client import S3Client from .gs.gspath import GSPath from .gs.gsclient import GSClient @@ -27,6 +29,24 @@ "implementation_registry", "GSClient", "GSPath", + "patch_glob", + "patch_open", + "patch_os_functions", "S3Client", "S3Path", ] + + +if bool(os.environ.get("CLOUDPATHLIB_PATCH_OPEN", "")): + patch_open() + +if bool(os.environ.get("CLOUDPATHLIB_PATCH_OS", "")): + patch_os_functions() + +if bool(os.environ.get("CLOUDPATHLIB_PATCH_GLOB", "")): + patch_glob() + +if bool(os.environ.get("CLOUDPATHLIB_PATCH_ALL", "")): + patch_open() + patch_os_functions() + patch_glob diff --git a/cloudpathlib/client.py b/cloudpathlib/client.py index 1b6c32eb..9b3ddbec 100644 --- a/cloudpathlib/client.py +++ b/cloudpathlib/client.py @@ -109,8 +109,8 @@ def set_as_default_client(self) -> None: instances for this cloud without a client specified.""" self.__class__._default_client = self - def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath: - return self._cloud_meta.path_class(cloud_path=cloud_path, client=self) # type: ignore + def CloudPath(self, cloud_path: Union[str, BoundedCloudPath], *parts: str) -> BoundedCloudPath: + return self._cloud_meta.path_class(cloud_path, *parts, client=self) # type: ignore def clear_cache(self): """Clears the contents of the cache folder. diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 8bad810d..36523f4d 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -61,6 +61,7 @@ def _make_selector(pattern_parts, _flavour, case_sensitive=True): from .exceptions import ( ClientMismatchError, CloudPathFileExistsError, + CloudPathFileNotFoundError, CloudPathIsADirectoryError, CloudPathNotADirectoryError, CloudPathNotExistsError, @@ -210,6 +211,7 @@ class CloudPath(metaclass=CloudPathMeta): def __init__( self, cloud_path: Union[str, Self, "CloudPath"], + *parts: str, client: Optional["Client"] = None, ) -> None: # handle if local file gets opened. must be set at the top of the method in case any code @@ -217,6 +219,13 @@ def __init__( self._handle: Optional[IO] = None self._client: Optional["Client"] = None + if parts: + # ensure first part ends in "/"; (sometimes it is just prefix, sometimes a longer path) + if not str(cloud_path).endswith("/"): + cloud_path = str(cloud_path) + "/" + + cloud_path = str(cloud_path) + "/".join(p.strip("/") for p in parts) + self.is_valid_cloudpath(cloud_path, raise_on_error=True) self._cloud_meta.validate_completeness() @@ -553,11 +562,18 @@ def open( force_overwrite_to_cloud: bool = False, # extra kwarg not in pathlib ) -> IO[Any]: # if trying to call open on a directory that exists - if self.exists() and not self.is_file(): + exists_on_cloud = self.exists() + + if exists_on_cloud and not self.is_file(): raise CloudPathIsADirectoryError( f"Cannot open directory, only files. Tried to open ({self})" ) + if not exists_on_cloud and any(m in mode for m in ("r", "a")): + raise CloudPathFileNotFoundError( + f"File opened for read or append, but it does not exist on cloud: {self}" + ) + if mode == "x" and self.exists(): raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.") @@ -1094,7 +1110,7 @@ def _local(self) -> Path: """Cached local version of the file.""" return self.client._local_cache_dir / self._no_prefix - def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self: + def _new_cloudpath(self, path: Union[str, os.PathLike], *parts: str) -> Self: """Use the scheme, client, cache dir of this cloudpath to instantiate a new cloudpath of the same type with the path passed. @@ -1110,7 +1126,7 @@ def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self: if not path.startswith(self.cloud_prefix): path = f"{self.cloud_prefix}{path}" - return self.client.CloudPath(path) + return self.client.CloudPath(path, *parts) def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None: try: diff --git a/cloudpathlib/exceptions.py b/cloudpathlib/exceptions.py index 1b4499fb..a9f2ffb4 100644 --- a/cloudpathlib/exceptions.py +++ b/cloudpathlib/exceptions.py @@ -24,6 +24,10 @@ class CloudPathNotExistsError(CloudPathException): pass +class CloudPathFileNotFoundError(CloudPathException, FileNotFoundError): + pass + + class CloudPathIsADirectoryError(CloudPathException, IsADirectoryError): pass @@ -77,3 +81,7 @@ class OverwriteNewerCloudError(CloudPathException): class OverwriteNewerLocalError(CloudPathException): pass + + +class InvalidGlobArgumentsError(CloudPathException): + pass diff --git a/cloudpathlib/patches.py b/cloudpathlib/patches.py new file mode 100644 index 00000000..b20016f6 --- /dev/null +++ b/cloudpathlib/patches.py @@ -0,0 +1,346 @@ +import builtins +from contextlib import contextmanager +import glob +import os +import os.path + +from cloudpathlib.exceptions import InvalidGlobArgumentsError + +from .cloudpath import CloudPath + + +def _check_first_arg(*args, **kwargs): + return isinstance(args[0], CloudPath) + + +def _check_first_arg_first_index(*args, **kwargs): + return isinstance(args[0][0], CloudPath) + + +def _check_first_arg_or_root_dir(*args, **kwargs): + return isinstance(args[0], CloudPath) or isinstance(kwargs.get("root_dir", None), CloudPath) + + +def _patch_factory(original_version, cpl_version, cpl_check=_check_first_arg): + _original = original_version + + def _patched_version(*args, **kwargs): + if cpl_check(*args, **kwargs): + return cpl_version(*args, **kwargs) + else: + return _original(*args, **kwargs) + + original_version = _patched_version + return _patched_version + + +@contextmanager +def patch_open(): + patched = _patch_factory( + builtins.open, + CloudPath.open, + ) + original_open = builtins.open + builtins.open = patched + + original_fspath = CloudPath.__fspath__ + CloudPath.__fspath__ = ( + lambda x: x + ) # turn off `fspath` -> str since we patch everything to handle CloudPath + + try: + yield patched + finally: + builtins.open = original_open + CloudPath.__fspath__ = original_fspath + + +def _cloudpath_fspath(path): + return path # no op, since methods should all handle cloudpaths when patched + + +def _cloudpath_os_listdir(path="."): + return list(path.iterdir()) + + +def _cloudpath_lstat(path, *, dir_fd=None): + return path.stat() + + +def _cloudpath_mkdir(path, *, dir_fd=None): + return path.mkdir() + + +def _cloudpath_os_makedirs(name, mode=0o777, exist_ok=False): + return CloudPath.mkdir(name, parents=True, exist_ok=exist_ok) + + +def _cloudpath_os_remove(path, *, dir_fd=None): + return path.unlink() + + +def _cloudpath_os_removedirs(name): + for d in name.parents: + d.rmdir() + + +def _cloudpath_os_rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None): + return src.rename(dst) + + +def _cloudpath_os_renames(old, new): + old.rename(new) # move file + _cloudpath_os_removedirs(old) # remove previous directories if empty + + +def _cloudpath_os_replace(src, dst, *, src_dir_fd=None, dst_dir_fd=None): + return src.rename(dst) + + +def _cloudpath_os_rmdir(path, *, dir_fd=None): + return path.rmdir() + + +def _cloudpath_os_scandir(path="."): + return path.iterdir() + + +def _cloudpath_os_stat(path, *, dir_fd=None, follow_symlinks=True): + return path.stat() + + +def _cloudpath_os_unlink(path, *, dir_fd=None): + return path.unlink() + + +def _cloudpath_os_walk(top, topdown=True, onerror=None, followlinks=False): + try: + dirs, files = [], [] + for p in top.iterdir(): + dirs.append(p) if p.is_dir() else files.append(p) + + if topdown: + yield (top, files, dirs) + + for d in dirs: + yield from _cloudpath_os_walk(d, topdown=topdown, onerror=onerror) + + if not topdown: + yield (top, files, dirs) + + except Exception as e: + if onerror is not None: + onerror(e) + else: + raise + + +def _cloudpath_os_path_basename(path): + return path.name + + +def __common(parts): + i = 0 + + try: + while all(item[i] == parts[0][i] for item in parts[1:]): + i += 1 + except IndexError: + pass + + return parts[0][:i] + + +def _cloudpath_os_path_commonpath(paths): + common = __common([p.parts for p in paths]) + return paths[0].client.CloudPath(*common) + + +def _cloudpath_os_path_commonprefix(list): + common = __common([str(p) for p in list]) + return common + + +def _cloudpath_os_path_dirname(path): + return path.parent + + +def _cloudpath_os_path_getatime(path): + return (path.stat().st_atime,) + + +def _cloudpath_os_path_getmtime(path): + return (path.stat().st_mtime,) + + +def _cloudpath_os_path_getctime(path): + return (path.stat().st_ctime,) + + +def _cloudpath_os_path_getsize(path): + return (path.stat().st_size,) + + +def _cloudpath_os_path_join(path, *paths): + for p in paths: + path /= p + return path + + +def _cloudpath_os_path_split(path): + return path.parent, path.name + + +def _cloudpath_os_path_splitext(path): + return str(path)[: -len(path.suffix)], path.suffix + + +@contextmanager +def patch_os_functions(): + os_level = [ + ("fspath", os.fspath, _cloudpath_fspath), + ("listdir", os.listdir, _cloudpath_os_listdir), + ("lstat", os.lstat, _cloudpath_lstat), + ("mkdir", os.mkdir, _cloudpath_mkdir), + ("makedirs", os.makedirs, _cloudpath_os_makedirs), + ("remove", os.remove, _cloudpath_os_remove), + ("removedirs", os.removedirs, _cloudpath_os_removedirs), + ("rename", os.rename, _cloudpath_os_rename), + ("renames", os.renames, _cloudpath_os_renames), + ("replace", os.replace, _cloudpath_os_replace), + ("rmdir", os.rmdir, _cloudpath_os_rmdir), + ("scandir", os.scandir, _cloudpath_os_scandir), + ("stat", os.stat, _cloudpath_os_stat), + ("unlink", os.unlink, _cloudpath_os_unlink), + ("walk", os.walk, _cloudpath_os_walk), + ] + + os_originals = {} + + for name, original, cloud in os_level: + os_originals[name] = original + patched = _patch_factory(original, cloud) + setattr(os, name, patched) + + os_path_level = [ + ("basename", os.path.basename, _cloudpath_os_path_basename, _check_first_arg), + ( + "commonpath", + os.path.commonpath, + _cloudpath_os_path_commonpath, + _check_first_arg_first_index, + ), + ( + "commonprefix", + os.path.commonprefix, + _cloudpath_os_path_commonprefix, + _check_first_arg_first_index, + ), + ("dirname", os.path.dirname, _cloudpath_os_path_dirname, _check_first_arg), + ("exists", os.path.exists, CloudPath.exists, _check_first_arg), + ("getatime", os.path.getatime, _cloudpath_os_path_getatime, _check_first_arg), + ("getmtime", os.path.getmtime, _cloudpath_os_path_getmtime, _check_first_arg), + ("getctime", os.path.getctime, _cloudpath_os_path_getctime, _check_first_arg), + ("getsize", os.path.getsize, _cloudpath_os_path_getsize, _check_first_arg), + ("isfile", os.path.isfile, CloudPath.is_file, _check_first_arg), + ("isdir", os.path.isdir, CloudPath.is_dir, _check_first_arg), + ("join", os.path.join, _cloudpath_os_path_join, _check_first_arg), + ("split", os.path.split, _cloudpath_os_path_split, _check_first_arg), + ("splitext", os.path.splitext, _cloudpath_os_path_splitext, _check_first_arg), + ] + + os_path_originals = {} + + for name, original, cloud, check in os_path_level: + os_path_originals[name] = original + patched = _patch_factory(original, cloud, cpl_check=check) + setattr(os.path, name, patched) + + try: + yield + finally: + for name, original in os_originals.items(): + setattr(os, name, original) + + for name, original in os_path_originals.items(): + setattr(os.path, name, original) + + +def _get_root_dir_pattern_from_pathname(pathname): + # get first wildcard + for i, part in enumerate(pathname.parts): + if "*" in part: + root_parts = pathname.parts[:i] + pattern_parts = pathname.parts[i:] + break + + root_dir = pathname._new_cloudpath(*root_parts) + pattern = "/".join(pattern_parts) + + return root_dir, pattern + + +def _cloudpath_glob_iglob( + pathname, *, root_dir=None, dir_fd=None, recursive=False, include_hidden=False +): + # if both are cloudpath, root_dir and pathname must share a parent, otherwise we don't know + # where to start the pattern + if isinstance(pathname, CloudPath) and isinstance(root_dir, CloudPath): + if not pathname.is_relative_to(root_dir): + raise InvalidGlobArgumentsError( + f"If both are CloudPaths, root_dir ({root_dir}) must be a parent of pathname ({pathname})." + ) + + else: + pattern = pathname.relative_to(root_dir) + + elif isinstance(pathname, CloudPath): + if root_dir is not None: + InvalidGlobArgumentsError( + "If pathname is a CloudPath, root_dir must also be a CloudPath or None." + ) + + root_dir, pattern = _get_root_dir_pattern_from_pathname(pathname) + + elif isinstance(root_dir, CloudPath): + pattern = pathname + + else: + raise InvalidGlobArgumentsError( + "At least one of pathname or root_dir must be a CloudPath." + ) + + return root_dir.glob(pattern) + + +def _cloudpath_glob_glob( + pathname, *, root_dir=None, dir_fd=None, recursive=False, include_hidden=False +): + return list( + _cloudpath_glob_iglob( + pathname, + root_dir=root_dir, + dir_fd=dir_fd, + recursive=recursive, + include_hidden=include_hidden, + ) + ) + + +@contextmanager +def patch_glob(): + original_glob = glob.glob + glob.glob = _patch_factory( + glob.glob, _cloudpath_glob_glob, cpl_check=_check_first_arg_or_root_dir + ) + + original_iglob = glob.iglob + glob.iglob = _patch_factory( + glob.iglob, _cloudpath_glob_iglob, cpl_check=_check_first_arg_or_root_dir + ) + + try: + yield + finally: + glob.glob = original_glob + glob.iglob = original_iglob diff --git a/docs/docs/patching_builtins.ipynb b/docs/docs/patching_builtins.ipynb new file mode 100644 index 00000000..3647eb1f --- /dev/null +++ b/docs/docs/patching_builtins.ipynb @@ -0,0 +1,321 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Patching Python builtins (third-party library compatibility)\n", + "\n", + "Not every Python library is implemented to accept pathlib-compatible objects like those implemented by cloudpathlib. Many libraries will only accept strings as filepaths. These libraries then may internally use `open`, functions from `os` and `os.path`, or other core library modules like `glob` to navigate paths and manipulate them.\n", + "\n", + "This means that out-of-the-box you can't just pass a `CloudPath` object to any method of function and have it work. For those implemented with `pathlib`, this will work. For anything else the code will throw an exception at some point.\n", + "\n", + "The long-term solution is to ask developers to implement their library to support either (1) pathlib-compatible objects for files and directories, or (2) file-like objects passed directly (e.g., so you could call `CloudPath.open` in your code and pass the the file-like object to the library).\n", + "\n", + "The short-term workaround that will be compatible with some libraries is to patch the builtins to make `open`, `os`, `os.path`, and `glob` work with `CloudPath` objects. Because this overrides default Python functionality, this is not on by default. When patched, these functions will use the `CloudPath` version if they are passed a `CloudPath` and will fallback to their normal implementations otherwise.\n", + "\n", + "These methods can be enabled by setting the following environment variables:\n", + " - `CLOUDPATHLIB_PACTH_ALL=1` - patch all the builtins we implement: `open`, `os` functions, and `glob`\n", + " - `CLOUDPATHLIB_PACTH_OPEN=1` - patch the builtin `open` method\n", + " - `CLOUDPATHLIB_PACTH_OS_FUNCTIONS=1` - patch the `os` functions\n", + " - `CLOUDPATHLIB_PACTH_GLOB=1` - patch the `glob` module\n", + "\n", + "You can set environment variables in many ways, but it is common to either pass it at the command line with something like `CLOUDPATHLIB_PACTH_ALL=1 python my_script.py` or to set it in your Python script with `os.environ['CLOUDPATHLIB_PACTH_ALL'] = 1`. Note, these _must_ be set before any `cloudpathlib` methods are imported.\n", + "\n", + "Alternatively, you can call methods to patch the functions.\n", + "\n", + "```python\n", + "from cloudpathlib import patch_open, patch_os_functions, patch_glob\n", + "\n", + "# patch builtins\n", + "patch_open()\n", + "patch_os_functions()\n", + "patch_glob()\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These patch methods are all context managers, so if you want to control where the patch is active, you can use them in a `with` statement. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unpatched version fails:\n", + "'S3Path' object is not subscriptable\n", + "Patched succeeds:\n", + "[S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirB/fileB'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/dirD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/fileC'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/dirD/fileD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/nested-dir/test.file'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/dirD/fileD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirB/fileB'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/dirD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/fileC'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/dirD/fileD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/dirD/fileD')]\n", + "`glob` module now is equivalent to `CloudPath.glob`\n", + "[S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirB/fileB'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/dirD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/fileC'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/dirD/fileD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/nested-dir/test.file'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/dirC/dirD/fileD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirB/fileB'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/dirD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/fileC'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/dirD/fileD'), S3Path('s3://cloudpathlib-test-bucket/manual-tests/glob_test/dirC/dirD/fileD')]\n" + ] + } + ], + "source": [ + "from glob import glob\n", + "\n", + "from cloudpathlib import patch_glob, CloudPath\n", + "\n", + "try:\n", + " glob(CloudPath(\"s3://cloudpathlib-test-bucket/manual-tests/**/*dir*/**\"))\n", + "except Exception as e:\n", + " print(\"Unpatched version fails:\")\n", + " print(e)\n", + "\n", + "\n", + "with patch_glob():\n", + " print(\"Patched succeeds:\")\n", + " print(glob(CloudPath(\"s3://cloudpathlib-test-bucket/manual-tests/**/*dir*/**/*\")))\n", + "\n", + " # or equivalently\n", + " print(\"`glob` module now is equivalent to `CloudPath.glob`\")\n", + " print(glob(\"**/*dir*/**/*\", root_dir=CloudPath(\"s3://cloudpathlib-test-bucket/manual-tests/\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see a similar result for patching the functions in the `os` module." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n", + "Patched version of `os.path.isdir` returns: None\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "from cloudpathlib import patch_os_functions, CloudPath\n", + "\n", + "print(os.path.isdir(CloudPath(\"s3://cloudpathlib-test-bucket/manual-tests/\")))\n", + "\n", + "\n", + "# try:\n", + "# os.path.isdir(\"s3://cloudpathlib-test-bucket/manual-tests/\")\n", + "# except Exception as e:\n", + "# print(\"Unpatched version fails:\")\n", + "# print(e)\n", + "\n", + "\n", + "with patch_os_functions():\n", + " result = os.path.isdir(CloudPath(\"s3://cloudpathlib-test-bucket/manual-tests/\"))\n", + " print(\"Patched version of `os.path.isdir` returns: \", result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Patching `open`\n", + "\n", + "Sometimes code uses the Python built-in `open` to open files and operate on them. Because of the way that is implemented, it only accepts a string to operate on. Unfortunately, that breaks usage with cloudpathlib.\n", + "\n", + "Instead, we can patch the built-in `open` to handle all the normal circumstances, and—if the argument is a `CloudPath`—use cloudpathlib to do the opening.\n", + "\n", + "### Patching `open` in Jupyter notebooks\n", + "\n", + "Jupyter notebooks require one extra step becaue they have their own version of `open` that is injected into the global namespace of the notebook. This means that you must _additionally_ replace that version of open with the patched version if you want to use `open` in a notebook. This can be done with the `patch_open` method by adding the following to the top of the notebook.\n", + "\n", + "```python\n", + "from cloudpathlib import patch_open\n", + "\n", + "# replace jupyter's `open` with one that works with CloudPath\n", + "open = patch_open()\n", + "```\n", + "\n", + "Here's an example that doesn't work right now (for example, if you depend on a thrid-party library that calls `open`)." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Errno 2] No such file or directory: '/var/folders/sz/c8j64tx91mj0jb0vd1s4wj700000gn/T/tmpvnzs5qnd/cloudpathlib-test-bucket/patching_builtins/file.txt'\n" + ] + } + ], + "source": [ + "from cloudpathlib import CloudPath, patch_open\n", + "\n", + "\n", + "# example of a function within a third-party library\n", + "def library_function(filepath: str):\n", + " with open(filepath, \"r\") as f:\n", + " print(f.read())\n", + "\n", + "\n", + "# create file to read\n", + "cp = CloudPath(\"s3://cloudpathlib-test-bucket/patching_builtins/file.txt\")\n", + "\n", + "# fails with a TypeError if passed a CloudPath\n", + "try:\n", + " library_function(cp)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "ContextDecorator.__call__() takes 2 positional arguments but 3 were given", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 16\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# create file to read\u001b[39;00m\n\u001b[1;32m 14\u001b[0m cp \u001b[38;5;241m=\u001b[39m CloudPath(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124ms3://cloudpathlib-test-bucket/patching_builtins/file.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 16\u001b[0m \u001b[43mlibrary_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcp\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[4], line 9\u001b[0m, in \u001b[0;36mlibrary_function\u001b[0;34m(filepath)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlibrary_function\u001b[39m(filepath: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilepath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(f\u001b[38;5;241m.\u001b[39mread())\n", + "\u001b[0;31mTypeError\u001b[0m: ContextDecorator.__call__() takes 2 positional arguments but 3 were given" + ] + } + ], + "source": [ + "from cloudpathlib import CloudPath, patch_open\n", + "\n", + "# jupyter patch\n", + "# open = patch_open()\n", + "\n", + "with patch_open():\n", + " # example of a function within a third-party library\n", + " def library_function(filepath: str):\n", + " with open(filepath, \"r\") as f:\n", + " print(f.read())\n", + "\n", + "\n", + " # create file to read\n", + " cp = CloudPath(\"s3://cloudpathlib-test-bucket/patching_builtins/file.txt\")\n", + "\n", + " library_function(cp)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> \u001b[0;32m/var/folders/sz/c8j64tx91mj0jb0vd1s4wj700000gn/T/ipykernel_34335/3906426398.py\u001b[0m(9)\u001b[0;36mlibrary_function\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m 7 \u001b[0;31m\u001b[0;31m# example of a function within a third-party library\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 8 \u001b[0;31m\u001b[0;32mdef\u001b[0m \u001b[0mlibrary_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m----> 9 \u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 10 \u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 11 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\n", + "\n", + "*** TypeError: ContextDecorator.__call__() missing 1 required positional argument: 'func'\n" + ] + } + ], + "source": [ + "%debug" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# `open`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cloudpathlib", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test-open.py b/test-open.py new file mode 100644 index 00000000..47fa12a0 --- /dev/null +++ b/test-open.py @@ -0,0 +1,25 @@ +import os +from cloudpathlib import CloudPath, patch_open, patch_os_functions + + +def hello(cp): + with open(cp, "a") as f: + f.write(" written") + + +if __name__ == "__main__": + patch_open() + + cp = CloudPath("s3://cloudpathlib-test-bucket/manual/text_file.txt") + cp.write_text("yah") + + hello(cp) + + print(cp.read_text()) + cp.unlink() + + patch_os_functions() + + print(list(os.walk("."))) + print(list(cp.parent.client._list_dir(cp.parent, recursive=True))) + print(list(os.walk(cp.parent))) diff --git a/tests/test_patching.py b/tests/test_patching.py new file mode 100644 index 00000000..8eb467f0 --- /dev/null +++ b/tests/test_patching.py @@ -0,0 +1,49 @@ +import importlib +import os + +import pytest + +import cloudpathlib +from cloudpathlib import patch_open + + +def test_patch_open(rig): + cp = rig.create_cloud_path("dir_0/new_file.txt") + + with pytest.raises(FileNotFoundError): + with open(cp, "w") as f: + f.write("Hello!") + + # set via method call + with patch_open(): + with open(cp, "w") as f: + f.write("Hello!") + + assert cp.read_text() == "Hello!" + + # set via env var + cp2 = rig.create_cloud_path("dir_0/new_file_two.txt") + original_env_setting = os.environ.get("CLOUDPATHLIB_PATCH_OPEN", "") + + try: + os.environ["CLOUDPATHLIB_PATCH_OPEN"] = "1" + + importlib.reload(cloudpathlib) + + with open(cp2, "w") as f: + f.write("Hello!") + + assert cp2.read_text() == "Hello!" + + finally: + os.environ["CLOUDPATHLIB_PATCH_OPEN"] = original_env_setting + importlib.reload(cloudpathlib) + + # cp.write_text("Hello!") + + # # remove cache + # cp._local.unlink() + + +def test_patches(rig): + pass