diff --git a/Dockerfile.app b/Dockerfile.app
index 6d4c6433..231eb7e4 100644
--- a/Dockerfile.app
+++ b/Dockerfile.app
@@ -32,7 +32,8 @@ RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \
     set -eux; \
     apt-get update; \
     apt-get upgrade -y; \
-    apt-get install --no-install-recommends -y procps; \
+    apt-get install --no-install-recommends -y \ 
+        procps tesseract-ocr tesseract-ocr-chi-sim tesseract-ocr-chi-tra tesseract-ocr-deu; \
     apt-get autoremove -y
 
 RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
diff --git a/adala/skills/__init__.py b/adala/skills/__init__.py
index 6cd7c3fa..65583f39 100644
--- a/adala/skills/__init__.py
+++ b/adala/skills/__init__.py
@@ -4,4 +4,5 @@
 from .collection.rag import RAGSkill
 from .collection.ontology_creation import OntologyCreator, OntologyMerger
 from .collection.label_studio import LabelStudioSkill
+from .collection.label_studio_image_ocr import LabelStudioSkillImageOCR
 from ._base import Skill, TransformSkill, AnalysisSkill, SynthesisSkill
diff --git a/adala/skills/collection/label_studio_image_ocr.py b/adala/skills/collection/label_studio_image_ocr.py
new file mode 100644
index 00000000..1bde9aa3
--- /dev/null
+++ b/adala/skills/collection/label_studio_image_ocr.py
@@ -0,0 +1,679 @@
+import re
+import logging
+import pandas as pd
+from typing import List, Optional, Type, Dict, Tuple
+from functools import cached_property
+from copy import deepcopy
+from collections import defaultdict
+import aiohttp
+import base64
+import asyncio
+import io
+from thefuzz import fuzz
+from PIL import Image
+from urllib.parse import urlparse
+import uuid
+from adala.skills._base import TransformSkill
+from adala.runtimes import AsyncLiteLLMVisionRuntime
+from adala.runtimes._litellm import MessageChunkType
+from pydantic import BaseModel, Field, model_validator, computed_field
+from difflib import SequenceMatcher
+import numpy as np
+
+from adala.runtimes import Runtime, AsyncRuntime
+from adala.utils.internal_data import InternalDataFrame
+
+from label_studio_sdk.label_interface import LabelInterface
+from label_studio_sdk.label_interface.control_tags import ControlTag, ObjectTag
+from label_studio_sdk._extensions.label_studio_tools.core.utils.json_schema import (
+    json_schema_to_pydantic,
+)
+from .match_bbox_by_text import find_text_in_image
+
+
+logger = logging.getLogger(__name__)
+
+
+def extract_variable_name(input_string):
+    """Extract variable name in which would be specified as $<variable-name>"""
+    pattern = r"\$([a-zA-Z0-9_]+)"
+    matches = re.findall(pattern, input_string)
+    return matches
+
+
+class LabelStudioSkillImageOCR(TransformSkill):
+
+    name: str = "label_studio"
+    input_template: str = "Annotate the input data according to the provided schema."
+    # TODO: remove output_template, fix calling @model_validator(mode='after') in the base class
+    output_template: str = "Output: {field_name}"
+    response_model: Type[BaseModel] = (
+        BaseModel  # why validate_response_model is called in the base class?
+    )
+    # ------------------------------
+    label_config: str = "<View></View>"
+    allowed_control_tags: Optional[list[str]] = None
+    allowed_object_tags: Optional[list[str]] = None
+
+    # TODO: implement postprocessing to verify Taxonomy
+
+    @cached_property
+    def label_interface(self) -> LabelInterface:
+        return LabelInterface(self.label_config)
+
+    @cached_property
+    def image_tags(self) -> List[ObjectTag]:
+        # check if any image tags are used as input variables
+        object_tag_names = self.allowed_object_tags or list(
+            self.label_interface._objects.keys()
+        )
+        tags = []
+        for tag_name in object_tag_names:
+            tag = self.label_interface.get_object(tag_name)
+            if tag.tag.lower() == "image":
+                tags.append(tag)
+        return tags
+
+    def __getstate__(self):
+        """Exclude cached properties when pickling - otherwise the 'Agent' can not be serialized in celery"""
+        state = deepcopy(super().__getstate__())
+        # Remove cached_property values
+        for key in ["label_interface", "ner_tags", "image_tags"]:
+            state["__dict__"].pop(key, None)
+        return state
+
+    @model_validator(mode="after")
+    def validate_response_model(self):
+
+        logger.debug(f"Read labeling config {self.label_config}")
+
+        if self.allowed_control_tags or self.allowed_object_tags:
+            if self.allowed_control_tags:
+                control_tags = {
+                    tag: self.label_interface._controls[tag]
+                    for tag in self.allowed_control_tags
+                }
+            else:
+                control_tags = self.label_interface._controls
+            if self.allowed_object_tags:
+                object_tags = {
+                    tag: self.label_interface._objects[tag]
+                    for tag in self.allowed_object_tags
+                }
+            else:
+                object_tags = self.label_interface._objects
+            interface = LabelInterface.create_instance(
+                tags={**control_tags, **object_tags}
+            )
+            logger.debug(
+                f"Filtered labeling config based on allowed tags {self.allowed_control_tags=} and {self.allowed_object_tags=} to {interface.config}"
+            )
+        else:
+            interface = self.label_interface
+
+        # NOTE: filtered label config is used for the response model, but full label config is used for the prompt, so that the model has as much context as possible.
+        self.field_schema = interface.to_json_schema()
+        logger.debug(f"Converted labeling config to json schema: {self.field_schema}")
+
+        return self
+
+    def _create_response_model_from_field_schema(self):
+        pass
+
+    def apply(
+        self,
+        input: InternalDataFrame,
+        runtime: Runtime,
+    ) -> InternalDataFrame:
+
+        with json_schema_to_pydantic(self.field_schema) as ResponseModel:
+            return runtime.batch_to_batch(
+                input,
+                input_template=self.input_template,
+                output_template="",
+                instructions_template=self.instructions,
+                response_model=ResponseModel,
+            )
+            
+    @classmethod
+    async def process_images_with_ocr(cls, images: list) -> list:
+        """
+        Process a list of images with OCR by calling the OCR service.
+        
+        Args:
+            images: List of image data (URLs or base64 strings)
+            
+        Returns:
+            List of OCR results for each image
+        """
+        
+        async def process_single_image(image_data):
+            # Check if the image is a URL
+            is_url = False
+            try:
+                parsed = urlparse(image_data)
+                is_url = all([parsed.scheme, parsed.netloc])
+            except:
+                is_url = False
+                
+            if not is_url:
+                logger.warning(f"Image data is not a URL. OCR service requires URLs or base64 data.")
+                return None
+            
+            # Download the image and convert to base64
+            async with aiohttp.ClientSession() as session:
+                try:
+                    async with session.get(image_data) as response:
+                        if response.status == 200:
+                            image_bytes = await response.read()
+                            # Get image dimensions
+                            image = Image.open(io.BytesIO(image_bytes))
+                            width, height = image.size
+                            # Convert to base64
+                            base64_data = base64.b64encode(image_bytes).decode('utf-8')
+                        else:
+                            error_text = await response.text()
+                            logger.error(f"Failed to download image: {response.status}, {error_text}")
+                            return None
+                except Exception as e:
+                    logger.error(f"Error downloading image: {str(e)}")
+                    return None
+            
+            # Call the OCR service with base64 data
+            ocr_url = "https://llm-ocr-server.appx.humansignal.com/ocr/base64"
+            
+            # Prepare form data - this is the key change
+            form_data = aiohttp.FormData()
+            form_data.add_field('image_data', base64_data)
+            form_data.add_field('confidence_threshold', str(0.3))
+            form_data.add_field('languages', 'en,ch_sim')
+            
+            async with aiohttp.ClientSession() as session:
+                try:
+                    async with session.post(ocr_url, data=form_data) as response:
+                        if response.status == 200:
+                            json_response = await response.json()
+                            return {
+                                "ocr_data": json_response,
+                                "image_width": width,
+                                "image_height": height
+                            }
+                        else:
+                            error_text = await response.text()
+                            logger.error(f"OCR service returned error: {response.status}, {error_text}")
+                            return None
+                except Exception as e:
+                    logger.error(f"Error calling OCR service: {str(e)}")
+                    return None
+        
+        # Process all images concurrently
+        tasks = [process_single_image(image) for image in images]
+        results = await asyncio.gather(*tasks)
+        
+        return results
+    
+    def _get_normalized_bbox(self, bbox: List[List[int]], original_width: int, original_height: int) -> List[float]:
+        # Calculate top-left corner (minimum x and y)
+        # bbox format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
+        x_values = [point[0] for point in bbox]
+        y_values = [point[1] for point in bbox]
+        
+        min_x = min(x_values)
+        min_y = min(y_values)
+        
+        # Calculate width and height
+        max_x = max(x_values)
+        max_y = max(y_values)
+        width = max_x - min_x
+        height = max_y - min_y
+        
+        # Convert to percentages
+        x_percent = (min_x / original_width) * 100
+        y_percent = (min_y / original_height) * 100
+        width_percent = (width / original_width) * 100
+        height_percent = (height / original_height) * 100
+        
+        return {
+            'x': x_percent,
+            'y': y_percent,
+            'width': width_percent,
+            'height': height_percent
+        }
+    
+    def _convert_ocr_results_to_label_studio_format(self, results: list) -> Tuple[List, List]:
+        
+        # normalize EasyOCR results to RectangleLabels bounding boxes format of Label Studio
+        all_bbox_annotations = []
+        all_text_annotations = []
+        for result in results:
+            if not result:
+                continue
+                
+            # Extract OCR response data
+            bboxes = result.get('ocr_data', {}).get('bboxes', [])
+            texts = result.get('ocr_data', {}).get('texts', [])
+            scores = result.get('ocr_data', {}).get('scores', [])
+            original_width = result.get('image_width', 1000)
+            original_height = result.get('image_height', 1000)
+            
+            # Convert to Label Studio format
+            bbox_annotations = []
+            text_annotations = []
+            
+            for i, (bbox, text, score) in enumerate(zip(bboxes, texts, scores)):
+                # EasyOCR bboxes format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
+                # We need to convert to Label Studio format with x,y as top-left corner
+                # and width, height as percentages
+                
+                bbox_annotation = self._get_normalized_bbox(bbox, original_width, original_height)
+                
+                # generate unique id for the annotation
+                id_gen = str(uuid.uuid4())[:8]
+                # Create Label Studio format annotation
+                bbox_annotation['id'] = id_gen
+                bbox_annotation['rotation'] = 0
+                
+                text_annotation = {
+                    'text': [text],
+                    'id': id_gen
+                }
+                
+                bbox_annotations.append(bbox_annotation)
+                text_annotations.append(text_annotation)
+            
+            # Replace the OCR result with the Label Studio formatted result
+            all_bbox_annotations.append(bbox_annotations)
+            all_text_annotations.append(text_annotations)
+            
+        return all_bbox_annotations, all_text_annotations
+    
+    def _convert_ocr_results_to_label_studio_format_v2(self, results: list) -> Tuple[List, List]:
+        """
+        Same as _convert_ocr_results_to_label_studio_format, but uses a different approach to filter the OCR results:
+        results['ocr_data'] contains a dictionary of reference texts as keys and lists of OCR results as values.
+        For each reference text, we create a group of `bbox_annotations` and `text_annotations`. 
+        In text_annotations, we place "parentID" as the id of the bbox_annotation that it belongs to (pick the first text_annotation id as parentID)
+        """
+        all_bbox_annotations = []
+        all_text_annotations = []
+        
+        for result in results:
+            bbox_annotations = []
+            text_annotations = []
+            
+            original_width = result['image_width']
+            original_height = result['image_height']
+            
+            # Process each reference text and its associated OCR results
+            for reference_text, ocr_matches in result['ocr_data'].items():
+                # Create a group for this reference text
+                group_id = None
+                
+                # Process each OCR match for this reference text
+                for bbox, text, score in zip(ocr_matches['bboxes'], ocr_matches['texts'], ocr_matches['scores']):
+                    
+                    bbox_annotation = self._get_normalized_bbox(bbox, original_width, original_height)
+                    
+                    # Generate unique id for the annotation
+                    id_gen = str(uuid.uuid4())[:8]
+                    if group_id is None:
+                        group_id = id_gen
+                    
+                    # Create bbox annotation
+                    bbox_annotation['rotation'] = 0
+                    bbox_annotation['id'] = id_gen
+                    bbox_annotation['score'] = score
+                    # Create text annotation
+                    text_annotation = {
+                        'text': [text],
+                        'id': id_gen,
+                    }
+                    if group_id != id_gen:
+                        text_annotation['parent_id'] = group_id
+                        bbox_annotation['parent_id'] = group_id
+                    
+                    bbox_annotations.append(bbox_annotation)
+                    text_annotations.append(text_annotation)
+                
+            # Add annotations for this result to the overall lists
+            all_bbox_annotations.append(bbox_annotations)
+            all_text_annotations.append(text_annotations)
+            
+        return all_bbox_annotations, all_text_annotations
+                
+        
+    @classmethod
+    def _calculate_similarity(cls, text: str, reference_texts: List[str]) -> Tuple[float, str]:
+        """
+        Calculate similarity between a text and substrings within reference texts.
+
+        Args:
+            text: The text to compare
+            reference_texts: List of reference texts
+
+        Returns:
+            Similarity score between 0 and 1 and the best matching text
+        """
+        # Convert to lowercase for case-insensitive comparison
+        text = text.lower()
+        text_len = len(text)
+        
+        best_score = 0
+        best_match = None
+        
+        if reference_texts:
+            for ref_text in reference_texts:
+                ref_text_lower = ref_text.lower()
+                best_window_score = fuzz.partial_ratio(text, ref_text_lower)
+                if best_window_score > best_score:
+                    best_score = best_window_score
+                    best_match = ref_text
+        best_score = float(best_score) / 100
+        print(f"Best substring similarity between '{text}' and '{best_match}': {best_score}")
+                    
+        return best_score, best_match
+    
+    def _filter_ocr_results(self, ocr_results: Dict, reference_texts: List[str]) -> list:
+        """
+        Filter OCR results based on similarity to output texts
+        
+        Args:
+            ocr_results: List of OCR results
+            reference_texts: List of reference texts
+                
+        Returns:
+            List of filtered OCR results
+        """
+        filtered_results = {
+            'bboxes': [],
+            'texts': [],
+            'scores': []
+        }
+        for bbox, text, score in zip(ocr_results['bboxes'], ocr_results['texts'], ocr_results['scores']):
+            # Simple similarity function - can be replaced with more sophisticated methods
+            similarity, best_match = self._calculate_similarity(text, reference_texts)
+            if similarity >= 0.9:
+                filtered_results['bboxes'].append(bbox)
+                filtered_results['texts'].append(best_match)
+                filtered_results['scores'].append(score)
+            
+        return filtered_results
+    
+    def _filter_ocr_results_v2(self, ocr_results: Dict, reference_texts: List[str]) -> Dict[str, List]:
+        
+        output = {}
+        for ref_text in reference_texts:
+            ref_text_lower = ref_text.lower()
+            output[ref_text] = {
+                'bboxes': [],
+                'texts': [],
+                'scores': []
+            }
+            for text, score, bbox in zip(ocr_results['texts'], ocr_results['scores'], ocr_results['bboxes']):
+                text_lower = text.lower()
+                # check if text is a fuzzy substring of ref_text
+                similarity = fuzz.partial_ratio(text_lower, ref_text_lower)
+                if similarity >= 95:
+                    output[ref_text]['bboxes'].append(bbox)
+                    output[ref_text]['texts'].append(text)
+                    output[ref_text]['scores'].append(score)
+            
+        # Filter to keep only horizontally aligned bounding boxes
+        for ref_text in output:
+            if not output[ref_text]['bboxes']:
+                continue
+                
+            # Group bounding boxes by their vertical position (y-coordinate)
+            # Using the middle y-coordinate of each box for grouping
+            y_groups = {}
+            for i, bbox in enumerate(output[ref_text]['bboxes']):
+                # Calculate middle y-coordinate of the bounding box
+                # bbox format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
+                # Calculate middle y-coordinate of the bounding box
+                # bbox format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
+                y_values = [point[1] for point in bbox]
+                mid_y = sum(y_values) / len(y_values)
+                
+                # Group with tolerance of 10 pixels
+                group_key = int(mid_y / 10) * 10
+                if group_key not in y_groups:
+                    y_groups[group_key] = []
+                y_groups[group_key].append(i)
+            
+            # Find the group with the maximum number of bounding boxes
+            max_group_key = max(y_groups.keys(), key=lambda k: len(y_groups[k]), default=None)
+            
+            if max_group_key is not None:
+                # Keep only the bounding boxes in the largest horizontal group
+                indices_to_keep = y_groups[max_group_key]
+                
+                # Create new filtered lists
+                filtered_bboxes = [output[ref_text]['bboxes'][i] for i in indices_to_keep]
+                filtered_texts = [output[ref_text]['texts'][i] for i in indices_to_keep]
+                filtered_scores = [output[ref_text]['scores'][i] for i in indices_to_keep]
+                
+                # Sort bounding boxes by x-coordinate to maintain reading order
+                sorted_indices = sorted(range(len(filtered_bboxes)), 
+                                       key=lambda i: min(point[0] for point in filtered_bboxes[i]))
+                
+                filtered_bboxes = [filtered_bboxes[i] for i in sorted_indices]
+                filtered_texts = [filtered_texts[i] for i in sorted_indices]
+                filtered_scores = [filtered_scores[i] for i in sorted_indices]
+                
+                # Create a combined bounding box that encompasses all individual boxes
+                if filtered_bboxes:
+                    # Find min and max coordinates across all bounding boxes
+                    all_x = [point[0] for bbox in filtered_bboxes for point in bbox]
+                    all_y = [point[1] for bbox in filtered_bboxes for point in bbox]
+                    
+                    min_x, max_x = min(all_x), max(all_x)
+                    min_y, max_y = min(all_y), max(all_y)
+                    
+                    # Create a new bounding box with the min/max coordinates
+                    combined_bbox = [
+                        [min_x, min_y],  # top-left
+                        [max_x, min_y],  # top-right
+                        [max_x, max_y],  # bottom-right
+                        [min_x, max_y]   # bottom-left
+                    ]
+                    
+                    # Calculate average score
+                    avg_score = sum(filtered_scores) / len(filtered_scores) if filtered_scores else 0
+                    
+                    # Add the combined bounding box to the results
+                    filtered_bboxes.insert(0, combined_bbox)
+                    filtered_texts.insert(0, ref_text)  # Use the reference text for the combined box
+                    filtered_scores.insert(0, avg_score)
+                
+                # Update the output with filtered results
+                output[ref_text]['bboxes'] = filtered_bboxes
+                output[ref_text]['texts'] = filtered_texts
+                output[ref_text]['scores'] = filtered_scores
+        
+        return output
+    
+    
+    def _get_labels(self) -> List[str]:
+        # TODO: validate labels are coming from <Labels> tag, use control tag name
+        # format: {'StartDate': LabelTag(attr={'value': 'StartDate', 'background': 'red'}, tag='Label', value='StartDate', parent_name='columns'), 'EndDate': LabelTag(attr={'value': 'EndDate', 'background': 'green'}, tag='Label', value='EndDate', parent_name='columns'), 'Amount': LabelTag(attr={'value': 'Amount'}, tag='Label', value='Amount', parent_name='columns')}
+        return list(self.label_interface.labels)[0]
+        
+
+    async def aapply(
+        self,
+        input: InternalDataFrame,
+        runtime: AsyncRuntime,
+    ) -> InternalDataFrame:
+        
+        labels = self._get_labels()
+        # validate labels
+        from adala.utils.pydantic_generator import field_schema_to_pydantic_class
+        LineItem = field_schema_to_pydantic_class(
+            class_name="LineItem",
+            description="A single line extracted from the document",
+            field_schema={label: {"type": "string"} for label in labels}
+        )
+        
+        class ResponseModel(BaseModel):
+            lines: List[LineItem]
+        
+        input_field_types = defaultdict(lambda: MessageChunkType.TEXT)
+        image_value_key = None
+        for tag in self.image_tags:
+            # these are the project variable names, NOT the label config tag names. TODO: pass this info from LSE to avoid recomputing it here.
+            variables = extract_variable_name(tag.value)
+            if len(variables) != 1:
+                logger.warning(
+                    f"Image tag {tag.name} has multiple variables: {variables}. Cannot mark these variables as image inputs."
+                )
+                continue
+            image_value_key = variables[0]
+            input_field_types[image_value_key] = (
+                MessageChunkType.IMAGE_URLS
+                if tag.attr.get("valueList")
+                else MessageChunkType.IMAGE_URL
+            )
+
+        logger.debug(
+            f"Using VisionRuntime with input field types: {input_field_types}"
+        )
+        output = await runtime.batch_to_batch(
+            input,
+            input_template=self.input_template,
+            output_template="",
+            instructions_template=self.instructions,
+            response_model=ResponseModel,
+            input_field_types=input_field_types,
+        )
+        print(f'Output: {output}')
+        
+        images = input[image_value_key].tolist()
+        all_bbox_annotations = []
+        all_text_annotations = []
+        all_label_annotations = []
+        for i, row in output.iterrows():
+            extracted_results = row['lines']
+            
+            ocr_results = find_text_in_image(images[i], extracted_results)
+            bbox_annotations = []
+            text_annotations = []
+            label_annotations = []
+            for ocr_result in ocr_results:
+                # Add bbox annotation
+                
+                bbox_id = ocr_result['element']['id']
+                parent_id = ocr_result['element'].get('parent_id')
+                
+                bbox_annotation = ocr_result['element']
+                bbox_annotation['score'] = ocr_result['matching_score'] * ocr_result['element']['score']
+                
+                bbox_annotations.append(ocr_result['element'])
+                
+                # Add text annotation
+                text_annotation = {
+                    'text': [ocr_result['reference_text']],
+                    'id': bbox_id
+                }
+                if parent_id:
+                    text_annotation['parent_id'] = parent_id
+                text_annotations.append(text_annotation)
+                
+                label = ocr_result.pop('reference_label', None)
+
+                if label:
+                    label_annotation = {
+                        'labels': [label],
+                        'id': bbox_id
+                    }
+                    if parent_id:
+                        label_annotation['parent_id'] = parent_id
+                    label_annotations.append(label_annotation)
+                    
+            all_bbox_annotations.append(bbox_annotations)
+            all_text_annotations.append(text_annotations)   
+            all_label_annotations.append(label_annotations)
+        output['bbox'] = all_bbox_annotations
+        output['transcription'] = all_text_annotations
+        output['columns'] = all_label_annotations
+        return output
+            
+
+        # with json_schema_to_pydantic(self.field_schema) as ResponseModel:
+        #     # special handling to flag image inputs if they exist
+        #     input_field_types = defaultdict(lambda: MessageChunkType.TEXT)
+        #     image_value_key = None
+        #     for tag in self.image_tags:
+        #         # these are the project variable names, NOT the label config tag names. TODO: pass this info from LSE to avoid recomputing it here.
+        #         variables = extract_variable_name(tag.value)
+        #         if len(variables) != 1:
+        #             logger.warning(
+        #                 f"Image tag {tag.name} has multiple variables: {variables}. Cannot mark these variables as image inputs."
+        #             )
+        #             continue
+        #         image_value_key = variables[0]
+        #         input_field_types[image_value_key] = (
+        #             MessageChunkType.IMAGE_URLS
+        #             if tag.attr.get("valueList")
+        #             else MessageChunkType.IMAGE_URL
+        #         )
+
+        #     logger.debug(
+        #         f"Using VisionRuntime with input field types: {input_field_types}"
+        #     )
+        #     output = await runtime.batch_to_batch(
+        #         input,
+        #         input_template=self.input_template,
+        #         output_template="",
+        #         instructions_template=self.instructions,
+        #         response_model=ResponseModel,
+        #         input_field_types=input_field_types,
+        #     )
+        #     print(f'Output: {output}')
+            # print(f'Process images with OCR: {input[image_value_key].tolist()}')
+            # # ocr_results = await self.process_images_with_ocr(input[image_value_key].tolist())
+            # # filtered_ocr_results = []
+            # images = input[image_value_key].tolist()
+            # all_bbox_annotations = []
+            # all_text_annotations = []
+            # for i, row in output.iterrows():
+            #     extracted_result = row['output']
+                
+            #     ocr_results = find_text_in_image(images[i], extracted_result)
+            #     bbox_annotations = []
+            #     text_annotations = []
+            #     for ocr_result in ocr_results:
+            #         # Convert OCR results to Label Studio format
+            #         parent_id = ocr_result['bbox']['id']
+
+            #         for word in ocr_result['words']:
+            #             bbox_annotation = word['bbox']
+            #             bbox_annotation['rotation'] = 0
+            #             bbox_annotation['parent_id'] = parent_id
+            #             bbox_annotation['score'] = word['score']
+                                                
+            #             text_annotation = {
+            #                 'text': [word['text']],
+            #                 'id': bbox_annotation['id'],
+            #                 'parent_id': parent_id
+            #             }
+                        
+            #             bbox_annotations.append(bbox_annotation)
+            #             text_annotations.append(text_annotation)
+                        
+            #         bbox_annotation = ocr_result['bbox']
+            #         bbox_annotation['rotation'] = 0
+            #         bbox_annotation['score'] = float(np.sqrt(ocr_result['detection_score'] * ocr_result['matching_score']))
+                    
+            #         text_annotation = {
+            #             'text': [ocr_result['reference_text']],
+            #             'id': parent_id
+            #         }
+                    
+            #         bbox_annotations.append(bbox_annotation)
+            #         text_annotations.append(text_annotation)
+                        
+            #     all_bbox_annotations.append(bbox_annotations)
+            #     all_text_annotations.append(text_annotations)   
+            # output['bbox'] = all_bbox_annotations
+            # output['transcription'] = all_text_annotations
+            # return output
diff --git a/poetry.lock b/poetry.lock
index 3467cbcb..85b81803 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3696,7 +3696,7 @@ optional = false
 python-versions = ">=3.9,<4"
 groups = ["main"]
 files = [
-    {file = "388257eeea2ed95ae769724bb8b6ad9c41511b96.zip", hash = "sha256:c86f91aebd989d762663aa49f4fff5dc128f168f70e4540b9d899e456a006f01"},
+    {file = "2b0d2cc9403c83cd99dc3e6cc06a580d9e9db4f1.zip", hash = "sha256:118574bddf431edf96d564f55173f6e0f104d233c1ebc3dac8ca68decd4fc6f4"},
 ]
 
 [package.dependencies]
@@ -3722,7 +3722,7 @@ xmljson = "0.2.1"
 
 [package.source]
 type = "url"
-url = "https://github.com/HumanSignal/label-studio-sdk/archive/388257eeea2ed95ae769724bb8b6ad9c41511b96.zip"
+url = "https://github.com/HumanSignal/label-studio-sdk/archive/2b0d2cc9403c83cd99dc3e6cc06a580d9e9db4f1.zip"
 
 [[package]]
 name = "litellm"
@@ -6063,6 +6063,22 @@ files = [
 [package.extras]
 dev = ["build", "flake8", "mypy", "pytest", "twine"]
 
+[[package]]
+name = "pytesseract"
+version = "0.3.13"
+description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "pytesseract-0.3.13-py3-none-any.whl", hash = "sha256:7a99c6c2ac598360693d83a416e36e0b33a67638bb9d77fdcac094a3589d4b34"},
+    {file = "pytesseract-0.3.13.tar.gz", hash = "sha256:4bf5f880c99406f52a3cfc2633e42d9dc67615e69d8a509d74867d3baddb5db9"},
+]
+
+[package.dependencies]
+packaging = ">=21.3"
+Pillow = ">=8.0.0"
+
 [[package]]
 name = "pytest"
 version = "7.4.4"
@@ -6524,6 +6540,113 @@ files = [
 [package.dependencies]
 cffi = {version = "*", markers = "implementation_name == \"pypy\""}
 
+[[package]]
+name = "rapidfuzz"
+version = "3.12.2"
+description = "rapid fuzzy string matching"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b9a75e0385a861178adf59e86d6616cbd0d5adca7228dc9eeabf6f62cf5b0b1"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6906a7eb458731e3dd2495af1d0410e23a21a2a2b7ced535e6d5cd15cb69afc5"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4b3334a8958b689f292d5ce8a928140ac98919b51e084f04bf0c14276e4c6ba"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:85a54ce30345cff2c79cbcffa063f270ad1daedd0d0c3ff6e541d3c3ba4288cf"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:acb63c5072c08058f8995404201a52fc4e1ecac105548a4d03c6c6934bda45a3"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5385398d390c6571f0f2a7837e6ddde0c8b912dac096dc8c87208ce9aaaa7570"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5032cbffa245b4beba0067f8ed17392ef2501b346ae3c1f1d14b950edf4b6115"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:195adbb384d89d6c55e2fd71e7fb262010f3196e459aa2f3f45f31dd7185fe72"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f43b773a4d4950606fb25568ecde5f25280daf8f97b87eb323e16ecd8177b328"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:55a43be0e0fa956a919043c19d19bd988991d15c59f179d413fe5145ed9deb43"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:71cf1ea16acdebe9e2fb62ee7a77f8f70e877bebcbb33b34e660af2eb6d341d9"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a3692d4ab36d44685f61326dca539975a4eda49b2a76f0a3df177d8a2c0de9d2"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-win32.whl", hash = "sha256:09227bd402caa4397ba1d6e239deea635703b042dd266a4092548661fb22b9c6"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-win_amd64.whl", hash = "sha256:0f05b7b95f9f87254b53fa92048367a8232c26cee7fc8665e4337268c3919def"},
+    {file = "rapidfuzz-3.12.2-cp310-cp310-win_arm64.whl", hash = "sha256:6938738e00d9eb6e04097b3f565097e20b0c398f9c58959a2bc64f7f6be3d9da"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9c4d984621ae17404c58f8d06ed8b025e167e52c0e6a511dfec83c37e9220cd"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f9132c55d330f0a1d34ce6730a76805323a6250d97468a1ca766a883d6a9a25"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b343b6cb4b2c3dbc8d2d4c5ee915b6088e3b144ddf8305a57eaab16cf9fc74"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24081077b571ec4ee6d5d7ea0e49bc6830bf05b50c1005028523b9cd356209f3"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c988a4fc91856260355773bf9d32bebab2083d4c6df33fafeddf4330e5ae9139"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:780b4469ee21cf62b1b2e8ada042941fd2525e45d5fb6a6901a9798a0e41153c"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edd84b0a323885493c893bad16098c5e3b3005d7caa995ae653da07373665d97"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efa22059c765b3d8778083805b199deaaf643db070f65426f87d274565ddf36a"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:095776b11bb45daf7c2973dd61cc472d7ea7f2eecfa454aef940b4675659b92f"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7e2574cf4aa86065600b664a1ac7b8b8499107d102ecde836aaaa403fc4f1784"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d5a3425a6c50fd8fbd991d8f085ddb504791dae6ef9cc3ab299fea2cb5374bef"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:97fb05e1ddb7b71a054040af588b0634214ee87cea87900d309fafc16fd272a4"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-win32.whl", hash = "sha256:b4c5a0413589aef936892fbfa94b7ff6f7dd09edf19b5a7b83896cc9d4e8c184"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-win_amd64.whl", hash = "sha256:58d9ae5cf9246d102db2a2558b67fe7e73c533e5d769099747921232d88b9be2"},
+    {file = "rapidfuzz-3.12.2-cp311-cp311-win_arm64.whl", hash = "sha256:7635fe34246cd241c8e35eb83084e978b01b83d5ef7e5bf72a704c637f270017"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1d982a651253ffe8434d9934ff0c1089111d60502228464721a2a4587435e159"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:02e6466caa0222d5233b1f05640873671cd99549a5c5ba4c29151634a1e56080"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e956b3f053e474abae69ac693a52742109d860ac2375fe88e9387d3277f4c96c"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dee7d740a2d5418d4f964f39ab8d89923e6b945850db833e798a1969b19542a"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a057cdb0401e42c84b6516c9b1635f7aedd5e430c6e388bd5f6bcd1d6a0686bb"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dccf8d4fb5b86d39c581a59463c596b1d09df976da26ff04ae219604223d502f"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21d5b3793c6f5aecca595cd24164bf9d3c559e315ec684f912146fc4e769e367"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:46a616c0e13cff2de1761b011e0b14bb73b110182f009223f1453d505c9a975c"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19fa5bc4301a1ee55400d4a38a8ecf9522b0391fc31e6da5f4d68513fe5c0026"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:544a47190a0d25971658a9365dba7095397b4ce3e897f7dd0a77ca2cf6fa984e"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:f21af27c5e001f0ba1b88c36a0936437dfe034c452548d998891c21125eb640f"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b63170d9db00629b5b3f2862114d8d6ee19127eaba0eee43762d62a25817dbe0"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-win32.whl", hash = "sha256:6c7152d77b2eb6bfac7baa11f2a9c45fd5a2d848dbb310acd0953b3b789d95c9"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-win_amd64.whl", hash = "sha256:1a314d170ee272ac87579f25a6cf8d16a031e1f7a7b07663434b41a1473bc501"},
+    {file = "rapidfuzz-3.12.2-cp312-cp312-win_arm64.whl", hash = "sha256:d41e8231326e94fd07c4d8f424f6bed08fead6f5e6688d1e6e787f1443ae7631"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:941f31038dba5d3dedcfcceba81d61570ad457c873a24ceb13f4f44fcb574260"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fe2dfc454ee51ba168a67b1e92b72aad251e45a074972cef13340bbad2fd9438"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78fafaf7f5a48ee35ccd7928339080a0136e27cf97396de45259eca1d331b714"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0c7989ff32c077bb8fd53253fd6ca569d1bfebc80b17557e60750e6909ba4fe"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96fa00bc105caa34b6cd93dca14a29243a3a7f0c336e4dcd36348d38511e15ac"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bccfb30c668620c5bc3490f2dc7d7da1cca0ead5a9da8b755e2e02e2ef0dff14"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f9b0adc3d894beb51f5022f64717b6114a6fabaca83d77e93ac7675911c8cc5"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:32691aa59577f42864d5535cb6225d0f47e2c7bff59cf4556e5171e96af68cc1"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:758b10380ad34c1f51753a070d7bb278001b5e6fcf544121c6df93170952d705"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:50a9c54c0147b468363119132d514c5024fbad1ed8af12bd8bd411b0119f9208"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e3ceb87c11d2d0fbe8559bb795b0c0604b84cfc8bb7b8720b5c16e9e31e00f41"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f7c9a003002434889255ff5676ca0f8934a478065ab5e702f75dc42639505bba"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-win32.whl", hash = "sha256:cf165a76870cd875567941cf861dfd361a0a6e6a56b936c5d30042ddc9def090"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-win_amd64.whl", hash = "sha256:55bcc003541f5f16ec0a73bf6de758161973f9e8d75161954380738dd147f9f2"},
+    {file = "rapidfuzz-3.12.2-cp313-cp313-win_arm64.whl", hash = "sha256:69f6ecdf1452139f2b947d0c169a605de578efdb72cbb2373cb0a94edca1fd34"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c852cd8bed1516a64fd6e2d4c6f270d4356196ee03fda2af1e5a9e13c34643"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42e7f747b55529a6d0d1588695d71025e884ab48664dca54b840413dea4588d8"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a749fd2690f24ef256b264a781487746bbb95344364fe8fe356f0eef7ef206ba"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a11e1d036170bbafa43a9e63d8c309273564ec5bdfc5439062f439d1a16965a"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dfb337f1832c1231e3d5621bd0ebebb854e46036aedae3e6a49c1fc08f16f249"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e88c6e68fca301722fa3ab7fd3ca46998012c14ada577bc1e2c2fc04f2067ca6"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17e1a3a8b4b5125cfb63a6990459b25b87ea769bdaf90d05bb143f8febef076a"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b9f8177b24ccc0a843e85932b1088c5e467a7dd7a181c13f84c684b796bea815"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6c506bdc2f304051592c0d3b0e82eed309248ec10cdf802f13220251358375ea"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:30bf15c1ecec2798b713d551df17f23401a3e3653ad9ed4e83ad1c2b06e86100"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:bd9a67cfc83e8453ef17ddd1c2c4ce4a74d448a197764efb54c29f29fb41f611"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7a6eaec2ef658dd650c6eb9b36dff7a361ebd7d8bea990ce9d639b911673b2cb"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-win32.whl", hash = "sha256:d7701769f110332cde45c41759cb2a497de8d2dca55e4c519a46aed5fbb19d1a"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-win_amd64.whl", hash = "sha256:296bf0fd4f678488670e262c87a3e4f91900b942d73ae38caa42a417e53643b1"},
+    {file = "rapidfuzz-3.12.2-cp39-cp39-win_arm64.whl", hash = "sha256:7957f5d768de14f6b2715303ccdf224b78416738ee95a028a2965c95f73afbfb"},
+    {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5fd3ce849b27d063755829cda27a9dab6dbd63be3801f2a40c60ec563a4c90f"},
+    {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:54e53662d71ed660c83c5109127c8e30b9e607884b7c45d2aff7929bbbd00589"},
+    {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b9e43cf2213e524f3309d329f1ad8dbf658db004ed44f6ae1cd2919aa997da5"},
+    {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29ca445e320e5a8df3bd1d75b4fa4ecfa7c681942b9ac65b55168070a1a1960e"},
+    {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83eb7ef732c2f8533c6b5fbe69858a722c218acc3e1fc190ab6924a8af7e7e0e"},
+    {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:648adc2dd2cf873efc23befcc6e75754e204a409dfa77efd0fea30d08f22ef9d"},
+    {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9b1e6f48e1ffa0749261ee23a1c6462bdd0be5eac83093f4711de17a42ae78ad"},
+    {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:1ae9ded463f2ca4ba1eb762913c5f14c23d2e120739a62b7f4cc102eab32dc90"},
+    {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dda45f47b559be72ecbce45c7f71dc7c97b9772630ab0f3286d97d2c3025ab71"},
+    {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3745c6443890265513a3c8777f2de4cb897aeb906a406f97741019be8ad5bcc"},
+    {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36d3ef4f047ed1bc96fa29289f9e67a637ddca5e4f4d3dc7cb7f50eb33ec1664"},
+    {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:54bb69ebe5ca0bd7527357e348f16a4c0c52fe0c2fcc8a041010467dcb8385f7"},
+    {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3f2ddd5b99b254039a8c82be5749d4d75943f62eb2c2918acf6ffd586852834f"},
+    {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:8117dab9b26a1aaffab59b4e30f80ac4d55e61ad4139a637c149365960933bee"},
+    {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40c0f16d62d6553527de3dab2fb69709c4383430ea44bce8fb4711ed4cbc6ae3"},
+    {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f177e1eb6e4f5261a89c475e21bce7a99064a8f217d2336fb897408f46f0ceaf"},
+    {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df0cecc2852fcb078ed1b4482fac4fc2c2e7787f3edda8920d9a4c0f51b1c95"},
+    {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b3c4df0321df6f8f0b61afbaa2ced9622750ee1e619128db57a18533d139820"},
+    {file = "rapidfuzz-3.12.2.tar.gz", hash = "sha256:b0ba1ccc22fff782e7152a3d3d0caca44ec4e32dc48ba01c560b8593965b5aa3"},
+]
+
+[package.extras]
+all = ["numpy"]
+
 [[package]]
 name = "redis"
 version = "5.0.8"
@@ -7195,6 +7318,21 @@ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
 test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"]
 typing = ["mypy (>=1.6,<2.0)", "traitlets (>=5.11.1)"]
 
+[[package]]
+name = "thefuzz"
+version = "0.22.1"
+description = "Fuzzy string matching in python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "thefuzz-0.22.1-py3-none-any.whl", hash = "sha256:59729b33556850b90e1093c4cf9e618af6f2e4c985df193fdf3c5b5cf02ca481"},
+    {file = "thefuzz-0.22.1.tar.gz", hash = "sha256:7138039a7ecf540da323792d8592ef9902b1d79eb78c147d4f20664de79f3680"},
+]
+
+[package.dependencies]
+rapidfuzz = ">=3.0.0,<4.0.0"
+
 [[package]]
 name = "tiktoken"
 version = "0.7.0"
@@ -8467,4 +8605,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13"
-content-hash = "8930e9c676607922cdb1bfece4b688db95871e2d5b80c8bde2518d3311ad2675"
+content-hash = "167ce58f336fb164546c14a06fe12ae870266918f4290ab8b41d31e4b72d8307"
diff --git a/pyproject.toml b/pyproject.toml
index 42a677de..d230b4a0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ celery = {version = "^5.3.6", extras = ["redis"]}
 kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery
 uvicorn = "*"
 pydantic-settings = "^2.2.1"
-label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/388257eeea2ed95ae769724bb8b6ad9c41511b96.zip"}
+label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/2b0d2cc9403c83cd99dc3e6cc06a580d9e9db4f1.zip"}
 kafka-python-ng = "^2.2.3"
 requests = "^2.32.0"
 # Using litellm from forked repo until vertex fix is released: https://github.com/BerriAI/litellm/issues/7904
@@ -50,6 +50,8 @@ requests = "^2.32.0"
 litellm = {url = "https://github.com/HumanSignal/litellm/archive/c0506d5844ef20d0db14144fbcbf99c05637bde3.zip"}
 pandarallel = "^1.6.5"
 instructor = "^1.4.3"
+thefuzz = "0.22.1"
+pytesseract = "^0.3.13"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.3"