@@ -446,7 +446,11 @@ def load_partition(
446
446
f"Copying data for { partition } took { time .perf_counter () - t } seconds" ,
447
447
)
448
448
449
- def _partition_update (self , item : Dict [str , Any ]) -> str :
449
+ def _partition_update (
450
+ self ,
451
+ item : Dict [str , Any ],
452
+ update_enabled : Optional [bool ] = True ,
453
+ ) -> str :
450
454
"""Update the cached partition with the item information and return the name.
451
455
452
456
This method will mark the partition as dirty if the bounds of the partition
@@ -512,20 +516,24 @@ def _partition_update(self, item: Dict[str, Any]) -> str:
512
516
partition = self ._partition_cache [partition_name ]
513
517
514
518
if partition :
515
- # Only update the partition if the item is outside the current bounds
516
- if item ["datetime" ] < partition .datetime_range_min :
517
- partition .datetime_range_min = item ["datetime" ]
518
- partition .requires_update = True
519
- if item ["datetime" ] > partition .datetime_range_max :
520
- partition .datetime_range_max = item ["datetime" ]
521
- partition .requires_update = True
522
- if item ["end_datetime" ] < partition .end_datetime_range_min :
523
- partition .end_datetime_range_min = item ["end_datetime" ]
524
- partition .requires_update = True
525
- if item ["end_datetime" ] > partition .end_datetime_range_max :
526
- partition .end_datetime_range_max = item ["end_datetime" ]
527
- partition .requires_update = True
519
+ if update_enabled :
520
+ # Only update the partition if the item is outside the current bounds
521
+ if item ["datetime" ] < partition .datetime_range_min :
522
+ partition .datetime_range_min = item ["datetime" ]
523
+ partition .requires_update = True
524
+ if item ["datetime" ] > partition .datetime_range_max :
525
+ partition .datetime_range_max = item ["datetime" ]
526
+ partition .requires_update = True
527
+ if item ["end_datetime" ] < partition .end_datetime_range_min :
528
+ partition .end_datetime_range_min = item ["end_datetime" ]
529
+ partition .requires_update = True
530
+ if item ["end_datetime" ] > partition .end_datetime_range_max :
531
+ partition .end_datetime_range_max = item ["end_datetime" ]
532
+ partition .requires_update = True
528
533
else :
534
+ if not update_enabled :
535
+ raise Exception (f"Partition { partition_name } does not exist." )
536
+
529
537
# No partition exists yet; create a new one from item
530
538
partition = Partition (
531
539
name = partition_name ,
@@ -541,7 +549,11 @@ def _partition_update(self, item: Dict[str, Any]) -> str:
541
549
542
550
return partition_name
543
551
544
- def read_dehydrated (self , file : Union [Path , str ] = "stdin" ) -> Generator :
552
+ def read_dehydrated (
553
+ self ,
554
+ file : Union [Path , str ] = "stdin" ,
555
+ partition_update_enabled : Optional [bool ] = True ,
556
+ ) -> Generator :
545
557
if file is None :
546
558
file = "stdin"
547
559
if isinstance (file , str ):
@@ -572,15 +584,21 @@ def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator:
572
584
item [field ] = content_value
573
585
else :
574
586
item [field ] = tab_split [i ]
575
- item ["partition" ] = self ._partition_update (item )
587
+ item ["partition" ] = self ._partition_update (
588
+ item ,
589
+ partition_update_enabled ,
590
+ )
576
591
yield item
577
592
578
593
def read_hydrated (
579
- self , file : Union [Path , str , Iterator [Any ]] = "stdin" ,
594
+ self ,
595
+ file : Union [Path , str ,
596
+ Iterator [Any ]] = "stdin" ,
597
+ partition_update_enabled : Optional [bool ] = True ,
580
598
) -> Generator :
581
599
for line in read_json (file ):
582
600
item = self .format_item (line )
583
- item ["partition" ] = self ._partition_update (item )
601
+ item ["partition" ] = self ._partition_update (item , partition_update_enabled )
584
602
yield item
585
603
586
604
def load_items (
@@ -589,6 +607,7 @@ def load_items(
589
607
insert_mode : Optional [Methods ] = Methods .insert ,
590
608
dehydrated : Optional [bool ] = False ,
591
609
chunksize : Optional [int ] = 10000 ,
610
+ partition_update_enabled : Optional [bool ] = True ,
592
611
) -> None :
593
612
"""Load items json records."""
594
613
self .check_version ()
@@ -599,9 +618,9 @@ def load_items(
599
618
self ._partition_cache = {}
600
619
601
620
if dehydrated and isinstance (file , str ):
602
- items = self .read_dehydrated (file )
621
+ items = self .read_dehydrated (file , partition_update_enabled )
603
622
else :
604
- items = self .read_hydrated (file )
623
+ items = self .read_hydrated (file , partition_update_enabled )
605
624
606
625
for chunkin in chunked_iterable (items , chunksize ):
607
626
chunk = list (chunkin )
0 commit comments