Skip to content

Commit e09523c

Browse files
committed
Implement glob
1 parent 1e3b241 commit e09523c

File tree

5 files changed

+205
-41
lines changed

5 files changed

+205
-41
lines changed

cloudpathlib/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from .azure.azblobclient import AzureBlobClient
66
from .azure.azblobpath import AzureBlobPath
77
from .cloudpath import CloudPath, implementation_registry
8-
from .patches import patch_open, patch_os_functions
8+
from .patches import patch_open, patch_os_functions, patch_glob
99
from .s3.s3client import S3Client
1010
from .gs.gspath import GSPath
1111
from .gs.gsclient import GSClient
@@ -29,6 +29,7 @@
2929
"implementation_registry",
3030
"GSClient",
3131
"GSPath",
32+
"patch_glob",
3233
"patch_open",
3334
"patch_os_functions",
3435
"S3Client",
@@ -38,3 +39,14 @@
3839

3940
if bool(os.environ.get("CLOUDPATHLIB_PATCH_OPEN", "")):
4041
patch_open()
42+
43+
if bool(os.environ.get("CLOUDPATHLIB_PATCH_OS", "")):
44+
patch_os_functions()
45+
46+
if bool(os.environ.get("CLOUDPATHLIB_PATCH_GLOB", "")):
47+
patch_glob()
48+
49+
if bool(os.environ.get("CLOUDPATHLIB_PATCH_ALL", "")):
50+
patch_open()
51+
patch_os_functions()
52+
patch_glob

cloudpathlib/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ def set_as_default_client(self) -> None:
109109
instances for this cloud without a client specified."""
110110
self.__class__._default_client = self
111111

112-
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
113-
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self) # type: ignore
112+
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath], *parts: str) -> BoundedCloudPath:
113+
return self._cloud_meta.path_class(cloud_path, *parts, client=self) # type: ignore
114114

115115
def clear_cache(self):
116116
"""Clears the contents of the cache folder.

cloudpathlib/cloudpath.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def _make_selector(pattern_parts, _flavour, case_sensitive=True):
6161
from .exceptions import (
6262
ClientMismatchError,
6363
CloudPathFileExistsError,
64+
CloudPathFileNotFoundError,
6465
CloudPathIsADirectoryError,
6566
CloudPathNotADirectoryError,
6667
CloudPathNotExistsError,
@@ -561,11 +562,18 @@ def open(
561562
force_overwrite_to_cloud: bool = False, # extra kwarg not in pathlib
562563
) -> IO[Any]:
563564
# if trying to call open on a directory that exists
564-
if self.exists() and not self.is_file():
565+
exists_on_cloud = self.exists()
566+
567+
if exists_on_cloud and not self.is_file():
565568
raise CloudPathIsADirectoryError(
566569
f"Cannot open directory, only files. Tried to open ({self})"
567570
)
568571

572+
if not exists_on_cloud and any(m in mode for m in ("r", "a")):
573+
raise CloudPathFileNotFoundError(
574+
f"File opened for read or append, but it does not exist on cloud: {self}"
575+
)
576+
569577
if mode == "x" and self.exists():
570578
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
571579

@@ -1102,7 +1110,7 @@ def _local(self) -> Path:
11021110
"""Cached local version of the file."""
11031111
return self.client._local_cache_dir / self._no_prefix
11041112

1105-
def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self:
1113+
def _new_cloudpath(self, path: Union[str, os.PathLike], *parts: str) -> Self:
11061114
"""Use the scheme, client, cache dir of this cloudpath to instantiate
11071115
a new cloudpath of the same type with the path passed.
11081116
@@ -1118,7 +1126,7 @@ def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self:
11181126
if not path.startswith(self.cloud_prefix):
11191127
path = f"{self.cloud_prefix}{path}"
11201128

1121-
return self.client.CloudPath(path)
1129+
return self.client.CloudPath(path, *parts)
11221130

11231131
def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
11241132
try:

cloudpathlib/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ class CloudPathNotExistsError(CloudPathException):
2424
pass
2525

2626

27+
class CloudPathFileNotFoundError(CloudPathException, FileNotFoundError):
28+
pass
29+
30+
2731
class CloudPathIsADirectoryError(CloudPathException, IsADirectoryError):
2832
pass
2933

@@ -77,3 +81,7 @@ class OverwriteNewerCloudError(CloudPathException):
7781

7882
class OverwriteNewerLocalError(CloudPathException):
7983
pass
84+
85+
86+
class InvalidGlobArgumentsError(CloudPathException):
87+
pass

cloudpathlib/patches.py

Lines changed: 171 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import builtins
2+
from contextlib import contextmanager
3+
import glob
24
import os
35
import os.path
46

7+
from cloudpathlib.exceptions import InvalidGlobArgumentsError
8+
59
from .cloudpath import CloudPath
610

711

@@ -13,6 +17,10 @@ def _check_first_arg_first_index(*args, **kwargs):
1317
return isinstance(args[0][0], CloudPath)
1418

1519

20+
def _check_first_arg_or_root_dir(*args, **kwargs):
21+
return isinstance(args[0], CloudPath) or isinstance(kwargs.get("root_dir", None), CloudPath)
22+
23+
1624
def _patch_factory(original_version, cpl_version, cpl_check=_check_first_arg):
1725
_original = original_version
1826

@@ -26,14 +34,29 @@ def _patched_version(*args, **kwargs):
2634
return _patched_version
2735

2836

37+
@contextmanager
2938
def patch_open():
3039
patched = _patch_factory(
3140
builtins.open,
3241
CloudPath.open,
3342
)
43+
original_open = builtins.open
3444
builtins.open = patched
35-
CloudPath.__fspath__ = lambda x: x # turn off `fspath`
36-
return patched
45+
46+
original_fspath = CloudPath.__fspath__
47+
CloudPath.__fspath__ = (
48+
lambda x: x
49+
) # turn off `fspath` -> str since we patch everything to handle CloudPath
50+
51+
try:
52+
yield patched
53+
finally:
54+
builtins.open = original_open
55+
CloudPath.__fspath__ = original_fspath
56+
57+
58+
def _cloudpath_fspath(path):
59+
return path # no op, since methods should all handle cloudpaths when patched
3760

3861

3962
def _cloudpath_os_listdir(path="."):
@@ -172,39 +195,152 @@ def _cloudpath_os_path_splitext(path):
172195
return str(path)[: -len(path.suffix)], path.suffix
173196

174197

198+
@contextmanager
175199
def patch_os_functions():
176-
os.listdir = _patch_factory(os.listdir, _cloudpath_os_listdir)
177-
os.lstat = _patch_factory(os.lstat, _cloudpath_lstat)
178-
os.mkdir = _patch_factory(os.mkdir, _cloudpath_mkdir)
179-
os.makedirs = _patch_factory(os.makedirs, _cloudpath_os_makedirs)
180-
os.remove = _patch_factory(os.remove, _cloudpath_os_remove)
181-
os.removedirs = _patch_factory(os.removedirs, _cloudpath_os_removedirs)
182-
os.rename = _patch_factory(os.rename, _cloudpath_os_rename)
183-
os.renames = _patch_factory(os.renames, _cloudpath_os_renames)
184-
os.replace = _patch_factory(os.replace, _cloudpath_os_replace)
185-
os.rmdir = _patch_factory(os.rmdir, _cloudpath_os_rmdir)
186-
os.scandir = _patch_factory(os.scandir, _cloudpath_os_scandir)
187-
os.stat = _patch_factory(os.stat, _cloudpath_os_stat)
188-
os.unlink = _patch_factory(os.unlink, _cloudpath_os_unlink)
189-
os.walk = _patch_factory(os.walk, _cloudpath_os_walk)
190-
191-
os.path.basename = _patch_factory(os.path.basename, _cloudpath_os_path_basename)
192-
os.path.commonpath = _patch_factory(
193-
os.path.commonpath, _cloudpath_os_path_commonpath, cpl_check=_check_first_arg_first_index
200+
os_level = [
201+
("fspath", os.fspath, _cloudpath_fspath),
202+
("listdir", os.listdir, _cloudpath_os_listdir),
203+
("lstat", os.lstat, _cloudpath_lstat),
204+
("mkdir", os.mkdir, _cloudpath_mkdir),
205+
("makedirs", os.makedirs, _cloudpath_os_makedirs),
206+
("remove", os.remove, _cloudpath_os_remove),
207+
("removedirs", os.removedirs, _cloudpath_os_removedirs),
208+
("rename", os.rename, _cloudpath_os_rename),
209+
("renames", os.renames, _cloudpath_os_renames),
210+
("replace", os.replace, _cloudpath_os_replace),
211+
("rmdir", os.rmdir, _cloudpath_os_rmdir),
212+
("scandir", os.scandir, _cloudpath_os_scandir),
213+
("stat", os.stat, _cloudpath_os_stat),
214+
("unlink", os.unlink, _cloudpath_os_unlink),
215+
("walk", os.walk, _cloudpath_os_walk),
216+
]
217+
218+
os_originals = {}
219+
220+
for name, original, cloud in os_level:
221+
os_originals[name] = original
222+
patched = _patch_factory(original, cloud)
223+
setattr(os, name, patched)
224+
225+
os_path_level = [
226+
("basename", os.path.basename, _cloudpath_os_path_basename, _check_first_arg),
227+
(
228+
"commonpath",
229+
os.path.commonpath,
230+
_cloudpath_os_path_commonpath,
231+
_check_first_arg_first_index,
232+
),
233+
(
234+
"commonprefix",
235+
os.path.commonprefix,
236+
_cloudpath_os_path_commonprefix,
237+
_check_first_arg_first_index,
238+
),
239+
("dirname", os.path.dirname, _cloudpath_os_path_dirname, _check_first_arg),
240+
("exists", os.path.exists, CloudPath.exists, _check_first_arg),
241+
("getatime", os.path.getatime, _cloudpath_os_path_getatime, _check_first_arg),
242+
("getmtime", os.path.getmtime, _cloudpath_os_path_getmtime, _check_first_arg),
243+
("getctime", os.path.getctime, _cloudpath_os_path_getctime, _check_first_arg),
244+
("getsize", os.path.getsize, _cloudpath_os_path_getsize, _check_first_arg),
245+
("isfile", os.path.isfile, CloudPath.is_file, _check_first_arg),
246+
("isdir", os.path.isdir, CloudPath.is_dir, _check_first_arg),
247+
("join", os.path.join, _cloudpath_os_path_join, _check_first_arg),
248+
("split", os.path.split, _cloudpath_os_path_split, _check_first_arg),
249+
("splitext", os.path.splitext, _cloudpath_os_path_splitext, _check_first_arg),
250+
]
251+
252+
os_path_originals = {}
253+
254+
for name, original, cloud, check in os_path_level:
255+
os_path_originals[name] = original
256+
patched = _patch_factory(original, cloud, cpl_check=check)
257+
setattr(os.path, name, patched)
258+
259+
try:
260+
yield
261+
finally:
262+
for name, original in os_originals.items():
263+
setattr(os, name, original)
264+
265+
for name, original in os_path_originals.items():
266+
setattr(os.path, name, original)
267+
268+
269+
def _get_root_dir_pattern_from_pathname(pathname):
270+
# get first wildcard
271+
for i, part in enumerate(pathname.parts):
272+
if "*" in part:
273+
root_parts = pathname.parts[:i]
274+
pattern_parts = pathname.parts[i:]
275+
break
276+
277+
root_dir = pathname._new_cloudpath(*root_parts)
278+
pattern = "/".join(pattern_parts)
279+
280+
return root_dir, pattern
281+
282+
283+
def _cloudpath_glob_iglob(
284+
pathname, *, root_dir=None, dir_fd=None, recursive=False, include_hidden=False
285+
):
286+
# if both are cloudpath, root_dir and pathname must share a parent, otherwise we don't know
287+
# where to start the pattern
288+
if isinstance(pathname, CloudPath) and isinstance(root_dir, CloudPath):
289+
if not pathname.is_relative_to(root_dir):
290+
raise InvalidGlobArgumentsError(
291+
f"If both are CloudPaths, root_dir ({root_dir}) must be a parent of pathname ({pathname})."
292+
)
293+
294+
else:
295+
pattern = pathname.relative_to(root_dir)
296+
297+
elif isinstance(pathname, CloudPath):
298+
if root_dir is not None:
299+
InvalidGlobArgumentsError(
300+
"If pathname is a CloudPath, root_dir must also be a CloudPath or None."
301+
)
302+
303+
root_dir, pattern = _get_root_dir_pattern_from_pathname(pathname)
304+
305+
elif isinstance(root_dir, CloudPath):
306+
pattern = pathname
307+
308+
else:
309+
raise InvalidGlobArgumentsError(
310+
"At least one of pathname or root_dir must be a CloudPath."
311+
)
312+
313+
return root_dir.glob(pattern)
314+
315+
316+
def _cloudpath_glob_glob(
317+
pathname, *, root_dir=None, dir_fd=None, recursive=False, include_hidden=False
318+
):
319+
return list(
320+
_cloudpath_glob_iglob(
321+
pathname,
322+
root_dir=root_dir,
323+
dir_fd=dir_fd,
324+
recursive=recursive,
325+
include_hidden=include_hidden,
326+
)
194327
)
195-
os.path.commonprefix = _patch_factory(
196-
os.path.commonprefix,
197-
_cloudpath_os_path_commonprefix,
198-
cpl_check=_check_first_arg_first_index,
328+
329+
330+
@contextmanager
331+
def patch_glob():
332+
original_glob = glob.glob
333+
glob.glob = _patch_factory(
334+
glob.glob, _cloudpath_glob_glob, cpl_check=_check_first_arg_or_root_dir
335+
)
336+
337+
original_iglob = glob.iglob
338+
glob.iglob = _patch_factory(
339+
glob.iglob, _cloudpath_glob_iglob, cpl_check=_check_first_arg_or_root_dir
199340
)
200-
os.path.dirname = _patch_factory(os.path.dirname, _cloudpath_os_path_dirname)
201-
os.path.exists = _patch_factory(os.path.exists, CloudPath.exists)
202-
os.path.getatime = _patch_factory(os.path.getatime, _cloudpath_os_path_getatime)
203-
os.path.getmtime = _patch_factory(os.path.getmtime, _cloudpath_os_path_getmtime)
204-
os.path.getctime = _patch_factory(os.path.getctime, _cloudpath_os_path_getctime)
205-
os.path.getsize = _patch_factory(os.path.getsize, _cloudpath_os_path_getsize)
206-
os.path.isfile = _patch_factory(os.path.isfile, CloudPath.is_file)
207-
os.path.isdir = _patch_factory(os.path.isdir, CloudPath.is_dir)
208-
os.path.join = _patch_factory(os.path.join, _cloudpath_os_path_join)
209-
os.path.split = _patch_factory(os.path.split, _cloudpath_os_path_split)
210-
os.path.splitext = _patch_factory(os.path.splitext, _cloudpath_os_path_splitext)
341+
342+
try:
343+
yield
344+
finally:
345+
glob.glob = original_glob
346+
glob.iglob = original_iglob

0 commit comments

Comments
 (0)