Skip to content

Commit cb270e9

Browse files
committed
Add partition_update_enabled option
1 parent 1ea6c5d commit cb270e9

File tree

3 files changed

+60
-21
lines changed

3 files changed

+60
-21
lines changed

src/pypgstac/python/pypgstac/load.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,11 @@ def load_partition(
446446
f"Copying data for {partition} took {time.perf_counter() - t} seconds",
447447
)
448448

449-
def _partition_update(self, item: Dict[str, Any]) -> str:
449+
def _partition_update(
450+
self,
451+
item: Dict[str, Any],
452+
update_enabled: Optional[bool] = True,
453+
) -> str:
450454
"""Update the cached partition with the item information and return the name.
451455
452456
This method will mark the partition as dirty if the bounds of the partition
@@ -512,20 +516,24 @@ def _partition_update(self, item: Dict[str, Any]) -> str:
512516
partition = self._partition_cache[partition_name]
513517

514518
if partition:
515-
# Only update the partition if the item is outside the current bounds
516-
if item["datetime"] < partition.datetime_range_min:
517-
partition.datetime_range_min = item["datetime"]
518-
partition.requires_update = True
519-
if item["datetime"] > partition.datetime_range_max:
520-
partition.datetime_range_max = item["datetime"]
521-
partition.requires_update = True
522-
if item["end_datetime"] < partition.end_datetime_range_min:
523-
partition.end_datetime_range_min = item["end_datetime"]
524-
partition.requires_update = True
525-
if item["end_datetime"] > partition.end_datetime_range_max:
526-
partition.end_datetime_range_max = item["end_datetime"]
527-
partition.requires_update = True
519+
if update_enabled:
520+
# Only update the partition if the item is outside the current bounds
521+
if item["datetime"] < partition.datetime_range_min:
522+
partition.datetime_range_min = item["datetime"]
523+
partition.requires_update = True
524+
if item["datetime"] > partition.datetime_range_max:
525+
partition.datetime_range_max = item["datetime"]
526+
partition.requires_update = True
527+
if item["end_datetime"] < partition.end_datetime_range_min:
528+
partition.end_datetime_range_min = item["end_datetime"]
529+
partition.requires_update = True
530+
if item["end_datetime"] > partition.end_datetime_range_max:
531+
partition.end_datetime_range_max = item["end_datetime"]
532+
partition.requires_update = True
528533
else:
534+
if not update_enabled:
535+
raise Exception(f"Partition {partition_name} does not exist.")
536+
529537
# No partition exists yet; create a new one from item
530538
partition = Partition(
531539
name=partition_name,
@@ -541,7 +549,11 @@ def _partition_update(self, item: Dict[str, Any]) -> str:
541549

542550
return partition_name
543551

544-
def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator:
552+
def read_dehydrated(
553+
self,
554+
file: Union[Path, str] = "stdin",
555+
partition_update_enabled: Optional[bool] = True,
556+
) -> Generator:
545557
if file is None:
546558
file = "stdin"
547559
if isinstance(file, str):
@@ -572,15 +584,21 @@ def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator:
572584
item[field] = content_value
573585
else:
574586
item[field] = tab_split[i]
575-
item["partition"] = self._partition_update(item)
587+
item["partition"] = self._partition_update(
588+
item,
589+
partition_update_enabled,
590+
)
576591
yield item
577592

578593
def read_hydrated(
579-
self, file: Union[Path, str, Iterator[Any]] = "stdin",
594+
self,
595+
file: Union[Path, str,
596+
Iterator[Any]] = "stdin",
597+
partition_update_enabled: Optional[bool] = True,
580598
) -> Generator:
581599
for line in read_json(file):
582600
item = self.format_item(line)
583-
item["partition"] = self._partition_update(item)
601+
item["partition"] = self._partition_update(item, partition_update_enabled)
584602
yield item
585603

586604
def load_items(
@@ -589,6 +607,7 @@ def load_items(
589607
insert_mode: Optional[Methods] = Methods.insert,
590608
dehydrated: Optional[bool] = False,
591609
chunksize: Optional[int] = 10000,
610+
partition_update_enabled: Optional[bool] = True,
592611
) -> None:
593612
"""Load items json records."""
594613
self.check_version()
@@ -599,9 +618,9 @@ def load_items(
599618
self._partition_cache = {}
600619

601620
if dehydrated and isinstance(file, str):
602-
items = self.read_dehydrated(file)
621+
items = self.read_dehydrated(file, partition_update_enabled)
603622
else:
604-
items = self.read_hydrated(file)
623+
items = self.read_hydrated(file, partition_update_enabled)
605624

606625
for chunkin in chunked_iterable(items, chunksize):
607626
chunk = list(chunkin)

src/pypgstac/python/pypgstac/pypgstac.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,16 @@ def load(
6363
method: Optional[Methods] = Methods.insert,
6464
dehydrated: Optional[bool] = False,
6565
chunksize: Optional[int] = 10000,
66+
partition_update_enabled: Optional[bool] = True,
6667
) -> None:
6768
"""Load collections or items into PGStac."""
6869
loader = Loader(db=self._db)
6970
if table == "collections":
7071
loader.load_collections(file, method)
7172
if table == "items":
72-
loader.load_items(file, method, dehydrated, chunksize)
73+
loader.load_items(
74+
file, method, dehydrated, chunksize, partition_update_enabled,
75+
)
7376

7477
def loadextensions(self) -> None:
7578
conn = self._db.connect()

src/pypgstac/tests/test_load.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,20 @@ def test_load_items_nopartitionconstraint_succeeds(loader: Loader) -> None:
441441
str(TEST_ITEMS),
442442
insert_mode=Methods.upsert,
443443
)
444+
445+
446+
def test_load_items_when_partition_creation_disabled(loader: Loader) -> None:
447+
"""
448+
Test pypgstac items loader raises an exception when partition
449+
does not exist and partition creation is disabled.
450+
"""
451+
loader.load_collections(
452+
str(TEST_COLLECTIONS_JSON),
453+
insert_mode=Methods.insert,
454+
)
455+
with pytest.raises(ValueError):
456+
loader.load_items(
457+
str(TEST_ITEMS),
458+
insert_mode=Methods.insert,
459+
partition_update_enabled=False,
460+
)

0 commit comments

Comments
 (0)