From 576f48bf89ebeddd8b09eb85a1d1967df2086a57 Mon Sep 17 00:00:00 2001 From: PhillipRDI Date: Fri, 21 Jun 2024 16:08:27 -0400 Subject: [PATCH 1/3] Exporting brush labels in both PNG and Numpy formats exports images and masks with base filenames that match images Previously it was impossible with the exported ZIP file to utilize exported brush data. It was not possible to determine with the mask filename which mask corresponded to which original image you uploaded. --- src/label_studio_sdk/converter/brush.py | 37 ++----- src/label_studio_sdk/converter/converter.py | 112 ++++++++++++++++++-- 2 files changed, 109 insertions(+), 40 deletions(-) diff --git a/src/label_studio_sdk/converter/brush.py b/src/label_studio_sdk/converter/brush.py index 68df2988f..ee1773622 100644 --- a/src/label_studio_sdk/converter/brush.py +++ b/src/label_studio_sdk/converter/brush.py @@ -99,7 +99,7 @@ def decode_rle(rle, print_params: bool = False): return out -def decode_from_annotation(from_name, results): +def decode_from_annotation(results): """from LS annotation to {"tag_name + label_name": [numpy uint8 image (width x height)]}""" layers = {} counters = defaultdict(int) @@ -116,7 +116,7 @@ def decode_from_annotation(from_name, results): width = result["original_width"] height = result["original_height"] labels = result[key] if key in result else ["no_label"] - name = from_name + "-" + "-".join(labels) + name = "".join(labels) # result count i = str(counters[name]) @@ -129,37 +129,17 @@ def decode_from_annotation(from_name, results): def save_brush_images_from_annotation( - task_id, - annotation_id, - completed_by, - from_name, + image_name, results, out_dir, out_format="numpy", ): - layers = decode_from_annotation(from_name, results) - if isinstance(completed_by, dict): - email = completed_by.get("email", "") - else: - email = str(completed_by) - email = "".join( - x for x in email if x.isalnum() or x == "@" or x == "." - ) # sanitize filename + layers = decode_from_annotation(results) + image_base = image_name.split('.')[0] for name in layers: sanitized_name = name.replace("/", "-").replace("\\", "-") - - filename = os.path.join( - out_dir, - "task-" - + str(task_id) - + "-annotation-" - + str(annotation_id) - + "-by-" - + email - + "-" - + sanitized_name, - ) + filename = os.path.join(out_dir,image_base+"-"+sanitized_name) image = layers[name] logger.debug(f"Save image to {filename}") if out_format == "numpy": @@ -175,10 +155,7 @@ def convert_task(item, out_dir, out_format="numpy"): """Task with multiple annotations to brush images, out_format = numpy | png""" for from_name, results in item["output"].items(): save_brush_images_from_annotation( - item["id"], - item["annotation_id"], - item["completed_by"], - from_name, + os.path.basename(item["input"]["image"]), results, out_dir, out_format, diff --git a/src/label_studio_sdk/converter/converter.py b/src/label_studio_sdk/converter/converter.py index edb7c5615..b4dd682cb 100644 --- a/src/label_studio_sdk/converter/converter.py +++ b/src/label_studio_sdk/converter/converter.py @@ -239,19 +239,27 @@ def convert(self, input_data, output_data, format, is_dir=True, **kwargs): input_data, output_data, output_image_dir=image_dir, is_dir=is_dir ) elif format == Format.BRUSH_TO_NUMPY: - items = ( - self.iter_from_dir(input_data) - if is_dir - else self.iter_from_json_file(input_data) + image_dir = kwargs.get("image_dir") + label_dir = kwargs.get("label_dir") + self.convert_to_brush( + input_data, + output_data, + output_image_dir=image_dir, + output_label_dir=label_dir, + is_dir=is_dir, + out_format="numpy", ) - brush.convert_task_dir(items, output_data, out_format="numpy") elif format == Format.BRUSH_TO_PNG: - items = ( - self.iter_from_dir(input_data) - if is_dir - else self.iter_from_json_file(input_data) + image_dir = kwargs.get("image_dir") + label_dir = kwargs.get("label_dir") + self.convert_to_brush( + input_data, + output_data, + output_image_dir=image_dir, + output_label_dir=label_dir, + is_dir=is_dir, + out_format="png", ) - brush.convert_task_dir(items, output_data, out_format="png") elif format == Format.ASR_MANIFEST: items = ( self.iter_from_dir(input_data) @@ -735,6 +743,90 @@ def add_image(images, width, height, image_id, image_path): indent=2, ) + def convert_to_brush( + self, + input_data, + output_dir, + output_image_dir=None, + output_label_dir=None, + is_dir=True, + out_format="png", + ): + """Convert data in a specific format to either PNG or Numpy format. + + Parameters + ---------- + input_data : str + The input data a directory. + output_dir : str + The directory to store the output files in. + output_image_dir : str, optional + The directory to store the image files in. If not provided, it will default to a subdirectory called 'images' in output_dir. + output_label_dir : str, optional + The directory to store the label files in. If not provided, it will default to a subdirectory called 'masks' in output_dir. + is_dir : bool, optional + A boolean indicating whether `input_data` is a directory (True) or a JSON file (False). + output_format : str, optional + A string either 'png' or 'numpy' indicating which mask format to use. + """ + ensure_dir(output_dir) + if output_image_dir is not None: + ensure_dir(output_image_dir) + else: + output_image_dir = os.path.join(output_dir, "images") + os.makedirs(output_image_dir, exist_ok=True) + if output_label_dir is not None: + ensure_dir(output_label_dir) + else: + output_label_dir = os.path.join(output_dir, "masks") + os.makedirs(output_label_dir, exist_ok=True) + categories, category_name_to_id = self._get_labels() + data_key = self._data_keys[0] + + # Write all segmentation PNGs or Numpy masks + items = ( + self.iter_from_dir(input_data) + if is_dir + else self.iter_from_json_file(input_data) + ) + brush.convert_task_dir(items, output_label_dir, out_format) + + # Write all raw images to the "images" folder + item_iterator = ( + self.iter_from_dir(input_data) + if is_dir + else self.iter_from_json_file(input_data) + ) + for item_idx, item in enumerate(item_iterator): + # get image path(s) and label file path + image_paths = item["input"][data_key] + image_paths = [image_paths] if isinstance(image_paths, str) else image_paths + # download image(s) + image_path = None + # TODO: for multi-page annotation, this code won't produce correct relationships between page and annotated shapes + # fixing the issue in RND-84 + for image_path in reversed(image_paths): + if not os.path.exists(image_path): + try: + image_path = download( + image_path, + output_image_dir, + project_dir=self.project_dir, + return_relative_path=True, + upload_dir=self.upload_dir, + download_resources=self.download_resources, + ) + except: + logger.info( + "Unable to download {image_path}. The item {item} will be skipped".format( + image_path=image_path, item=item + ), + exc_info=True, + ) + if not image_path: + logger.error(f"No image path found for item #{item_idx}") + continue + def convert_to_yolo( self, input_data, From 48693c498759f4b1b7c6209200d4d81f460f9be1 Mon Sep 17 00:00:00 2001 From: PhillipRDI Date: Thu, 27 Jun 2024 12:03:38 -0400 Subject: [PATCH 2/3] feat: Export brush labels with matching file base name (updated) Added a fix to the brush label export feature so that filenames with multiple periods are properly exported. --- src/label_studio_sdk/converter/brush.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/label_studio_sdk/converter/brush.py b/src/label_studio_sdk/converter/brush.py index ee1773622..f78dec570 100644 --- a/src/label_studio_sdk/converter/brush.py +++ b/src/label_studio_sdk/converter/brush.py @@ -135,7 +135,7 @@ def save_brush_images_from_annotation( out_format="numpy", ): layers = decode_from_annotation(results) - image_base = image_name.split('.')[0] + image_base = ".".join(image_name.split('.')[0:-1]) for name in layers: sanitized_name = name.replace("/", "-").replace("\\", "-") From bd7bbba3b8ddb6295730aaae2261a2c28ba332ab Mon Sep 17 00:00:00 2001 From: Phillip Boone Date: Tue, 6 Aug 2024 14:15:57 -0400 Subject: [PATCH 3/3] Migrated convert_to_brush from converter.py to brush.py. --- src/label_studio_sdk/converter/brush.py | 88 ++++++++++++++++++++ src/label_studio_sdk/converter/converter.py | 90 +-------------------- 2 files changed, 92 insertions(+), 86 deletions(-) diff --git a/src/label_studio_sdk/converter/brush.py b/src/label_studio_sdk/converter/brush.py index f78dec570..e7d9cd454 100644 --- a/src/label_studio_sdk/converter/brush.py +++ b/src/label_studio_sdk/converter/brush.py @@ -37,6 +37,11 @@ from collections import defaultdict from itertools import groupby +from label_studio_sdk.converter.utils import ( + download, + ensure_dir, +) + logger = logging.getLogger(__name__) @@ -170,6 +175,89 @@ def convert_task_dir(items, out_dir, out_format="numpy"): # convert_task_dir('/ls/test/completions', '/ls/test/completions/output', 'numpy') +def convert_to_brush( + self, + input_data, + output_dir, + output_image_dir=None, + output_label_dir=None, + is_dir=True, + out_format="png", +): + """Convert data in a specific format to either PNG or Numpy format. + + Parameters + ---------- + input_data : str + The input data a directory. + output_dir : str + The directory to store the output files in. + output_image_dir : str, optional + The directory to store the image files in. If not provided, it will default to a subdirectory called 'images' in output_dir. + output_label_dir : str, optional + The directory to store the label files in. If not provided, it will default to a subdirectory called 'masks' in output_dir. + is_dir : bool, optional + A boolean indicating whether `input_data` is a directory (True) or a JSON file (False). + output_format : str, optional + A string either 'png' or 'numpy' indicating which mask format to use. + """ + ensure_dir(output_dir) + if output_image_dir is not None: + ensure_dir(output_image_dir) + else: + output_image_dir = os.path.join(output_dir, "images") + os.makedirs(output_image_dir, exist_ok=True) + if output_label_dir is not None: + ensure_dir(output_label_dir) + else: + output_label_dir = os.path.join(output_dir, "masks") + os.makedirs(output_label_dir, exist_ok=True) + categories, category_name_to_id = self._get_labels() + data_key = self._data_keys[0] + + # Write all segmentation PNGs or Numpy masks + items = ( + self.iter_from_dir(input_data) + if is_dir + else self.iter_from_json_file(input_data) + ) + convert_task_dir(items, output_label_dir, out_format) + + # Write all raw images to the "images" folder + item_iterator = ( + self.iter_from_dir(input_data) + if is_dir + else self.iter_from_json_file(input_data) + ) + for item_idx, item in enumerate(item_iterator): + # get image path(s) and label file path + image_paths = item["input"][data_key] + image_paths = [image_paths] if isinstance(image_paths, str) else image_paths + # download image(s) + image_path = None + # TODO: for multi-page annotation, this code won't produce correct relationships between page and annotated shapes + # fixing the issue in RND-84 + for image_path in reversed(image_paths): + if not os.path.exists(image_path): + try: + image_path = download( + image_path, + output_image_dir, + project_dir=self.project_dir, + return_relative_path=True, + upload_dir=self.upload_dir, + download_resources=self.download_resources, + ) + except: + logger.info( + "Unable to download {image_path}. The item {item} will be skipped".format( + image_path=image_path, item=item + ), + exc_info=True, + ) + if not image_path: + logger.error(f"No image path found for item #{item_idx}") + continue ### Brush Import ### diff --git a/src/label_studio_sdk/converter/converter.py b/src/label_studio_sdk/converter/converter.py index b4dd682cb..b49a1d1a2 100644 --- a/src/label_studio_sdk/converter/converter.py +++ b/src/label_studio_sdk/converter/converter.py @@ -241,7 +241,8 @@ def convert(self, input_data, output_data, format, is_dir=True, **kwargs): elif format == Format.BRUSH_TO_NUMPY: image_dir = kwargs.get("image_dir") label_dir = kwargs.get("label_dir") - self.convert_to_brush( + brush.convert_to_brush( + self, input_data, output_data, output_image_dir=image_dir, @@ -252,7 +253,8 @@ def convert(self, input_data, output_data, format, is_dir=True, **kwargs): elif format == Format.BRUSH_TO_PNG: image_dir = kwargs.get("image_dir") label_dir = kwargs.get("label_dir") - self.convert_to_brush( + brush.convert_to_brush( + self, input_data, output_data, output_image_dir=image_dir, @@ -743,90 +745,6 @@ def add_image(images, width, height, image_id, image_path): indent=2, ) - def convert_to_brush( - self, - input_data, - output_dir, - output_image_dir=None, - output_label_dir=None, - is_dir=True, - out_format="png", - ): - """Convert data in a specific format to either PNG or Numpy format. - - Parameters - ---------- - input_data : str - The input data a directory. - output_dir : str - The directory to store the output files in. - output_image_dir : str, optional - The directory to store the image files in. If not provided, it will default to a subdirectory called 'images' in output_dir. - output_label_dir : str, optional - The directory to store the label files in. If not provided, it will default to a subdirectory called 'masks' in output_dir. - is_dir : bool, optional - A boolean indicating whether `input_data` is a directory (True) or a JSON file (False). - output_format : str, optional - A string either 'png' or 'numpy' indicating which mask format to use. - """ - ensure_dir(output_dir) - if output_image_dir is not None: - ensure_dir(output_image_dir) - else: - output_image_dir = os.path.join(output_dir, "images") - os.makedirs(output_image_dir, exist_ok=True) - if output_label_dir is not None: - ensure_dir(output_label_dir) - else: - output_label_dir = os.path.join(output_dir, "masks") - os.makedirs(output_label_dir, exist_ok=True) - categories, category_name_to_id = self._get_labels() - data_key = self._data_keys[0] - - # Write all segmentation PNGs or Numpy masks - items = ( - self.iter_from_dir(input_data) - if is_dir - else self.iter_from_json_file(input_data) - ) - brush.convert_task_dir(items, output_label_dir, out_format) - - # Write all raw images to the "images" folder - item_iterator = ( - self.iter_from_dir(input_data) - if is_dir - else self.iter_from_json_file(input_data) - ) - for item_idx, item in enumerate(item_iterator): - # get image path(s) and label file path - image_paths = item["input"][data_key] - image_paths = [image_paths] if isinstance(image_paths, str) else image_paths - # download image(s) - image_path = None - # TODO: for multi-page annotation, this code won't produce correct relationships between page and annotated shapes - # fixing the issue in RND-84 - for image_path in reversed(image_paths): - if not os.path.exists(image_path): - try: - image_path = download( - image_path, - output_image_dir, - project_dir=self.project_dir, - return_relative_path=True, - upload_dir=self.upload_dir, - download_resources=self.download_resources, - ) - except: - logger.info( - "Unable to download {image_path}. The item {item} will be skipped".format( - image_path=image_path, item=item - ), - exc_info=True, - ) - if not image_path: - logger.error(f"No image path found for item #{item_idx}") - continue - def convert_to_yolo( self, input_data,