From 0fe22df4ce7ffa6a5b1a7eb4749d15aa1d6a8b8b Mon Sep 17 00:00:00 2001 From: nik Date: Wed, 5 Mar 2025 14:58:13 +0100 Subject: [PATCH 01/10] Add skill for image+bbox extraction --- adala/skills/__init__.py | 1 + .../collection/label_studio_image_ocr.py | 384 ++++++++++++++++++ 2 files changed, 385 insertions(+) create mode 100644 adala/skills/collection/label_studio_image_ocr.py diff --git a/adala/skills/__init__.py b/adala/skills/__init__.py index 6cd7c3fa..65583f39 100644 --- a/adala/skills/__init__.py +++ b/adala/skills/__init__.py @@ -4,4 +4,5 @@ from .collection.rag import RAGSkill from .collection.ontology_creation import OntologyCreator, OntologyMerger from .collection.label_studio import LabelStudioSkill +from .collection.label_studio_image_ocr import LabelStudioSkillImageOCR from ._base import Skill, TransformSkill, AnalysisSkill, SynthesisSkill diff --git a/adala/skills/collection/label_studio_image_ocr.py b/adala/skills/collection/label_studio_image_ocr.py new file mode 100644 index 00000000..1171eb8b --- /dev/null +++ b/adala/skills/collection/label_studio_image_ocr.py @@ -0,0 +1,384 @@ +import re +import logging +import pandas as pd +from typing import List, Optional, Type +from functools import cached_property +from copy import deepcopy +from collections import defaultdict +import aiohttp +import base64 +import asyncio +import io +from PIL import Image +from urllib.parse import urlparse +import uuid +from adala.skills._base import TransformSkill +from adala.runtimes import AsyncLiteLLMVisionRuntime +from adala.runtimes._litellm import MessageChunkType +from pydantic import BaseModel, Field, model_validator, computed_field +from difflib import SequenceMatcher + + +from adala.runtimes import Runtime, AsyncRuntime +from adala.utils.internal_data import InternalDataFrame + +from label_studio_sdk.label_interface import LabelInterface +from label_studio_sdk.label_interface.control_tags import ControlTag, ObjectTag +from label_studio_sdk._extensions.label_studio_tools.core.utils.json_schema import ( + json_schema_to_pydantic, +) + + +logger = logging.getLogger(__name__) + + +def extract_variable_name(input_string): + """Extract variable name in which would be specified as $""" + pattern = r"\$([a-zA-Z0-9_]+)" + matches = re.findall(pattern, input_string) + return matches + + +class LabelStudioSkillImageOCR(TransformSkill): + + name: str = "label_studio" + input_template: str = "Annotate the input data according to the provided schema." + # TODO: remove output_template, fix calling @model_validator(mode='after') in the base class + output_template: str = "Output: {field_name}" + response_model: Type[BaseModel] = ( + BaseModel # why validate_response_model is called in the base class? + ) + # ------------------------------ + label_config: str = "" + allowed_control_tags: Optional[list[str]] = None + allowed_object_tags: Optional[list[str]] = None + + # TODO: implement postprocessing to verify Taxonomy + + @cached_property + def label_interface(self) -> LabelInterface: + return LabelInterface(self.label_config) + + @cached_property + def image_tags(self) -> List[ObjectTag]: + # check if any image tags are used as input variables + object_tag_names = self.allowed_object_tags or list( + self.label_interface._objects.keys() + ) + tags = [] + for tag_name in object_tag_names: + tag = self.label_interface.get_object(tag_name) + if tag.tag.lower() == "image": + tags.append(tag) + return tags + + def __getstate__(self): + """Exclude cached properties when pickling - otherwise the 'Agent' can not be serialized in celery""" + state = deepcopy(super().__getstate__()) + # Remove cached_property values + for key in ["label_interface", "ner_tags", "image_tags"]: + state["__dict__"].pop(key, None) + return state + + @model_validator(mode="after") + def validate_response_model(self): + + logger.debug(f"Read labeling config {self.label_config}") + + if self.allowed_control_tags or self.allowed_object_tags: + if self.allowed_control_tags: + control_tags = { + tag: self.label_interface._controls[tag] + for tag in self.allowed_control_tags + } + else: + control_tags = self.label_interface._controls + if self.allowed_object_tags: + object_tags = { + tag: self.label_interface._objects[tag] + for tag in self.allowed_object_tags + } + else: + object_tags = self.label_interface._objects + interface = LabelInterface.create_instance( + tags={**control_tags, **object_tags} + ) + logger.debug( + f"Filtered labeling config based on allowed tags {self.allowed_control_tags=} and {self.allowed_object_tags=} to {interface.config}" + ) + else: + interface = self.label_interface + + # NOTE: filtered label config is used for the response model, but full label config is used for the prompt, so that the model has as much context as possible. + self.field_schema = interface.to_json_schema() + logger.debug(f"Converted labeling config to json schema: {self.field_schema}") + + return self + + def _create_response_model_from_field_schema(self): + pass + + def apply( + self, + input: InternalDataFrame, + runtime: Runtime, + ) -> InternalDataFrame: + + with json_schema_to_pydantic(self.field_schema) as ResponseModel: + return runtime.batch_to_batch( + input, + input_template=self.input_template, + output_template="", + instructions_template=self.instructions, + response_model=ResponseModel, + ) + + @classmethod + async def process_images_with_ocr(cls, images: list) -> list: + """ + Process a list of images with OCR by calling the OCR service. + + Args: + images: List of image data (URLs or base64 strings) + + Returns: + List of OCR results for each image + """ + + async def process_single_image(image_data): + # Check if the image is a URL + is_url = False + try: + parsed = urlparse(image_data) + is_url = all([parsed.scheme, parsed.netloc]) + except: + is_url = False + + if not is_url: + logger.warning(f"Image data is not a URL. OCR service requires URLs or base64 data.") + return None + + # Download the image and convert to base64 + async with aiohttp.ClientSession() as session: + try: + async with session.get(image_data) as response: + if response.status == 200: + image_bytes = await response.read() + # Get image dimensions + image = Image.open(io.BytesIO(image_bytes)) + width, height = image.size + # Convert to base64 + base64_data = base64.b64encode(image_bytes).decode('utf-8') + else: + error_text = await response.text() + logger.error(f"Failed to download image: {response.status}, {error_text}") + return None + except Exception as e: + logger.error(f"Error downloading image: {str(e)}") + return None + + # Call the OCR service with base64 data + ocr_url = "https://llm-ocr-server.appx.humansignal.com/ocr/base64" + + # Prepare form data - this is the key change + form_data = aiohttp.FormData() + form_data.add_field('image_data', base64_data) + form_data.add_field('confidence_threshold', str(0.3)) + form_data.add_field('languages', 'en,ch_sim') + + async with aiohttp.ClientSession() as session: + try: + async with session.post(ocr_url, data=form_data) as response: + if response.status == 200: + json_response = await response.json() + return { + "ocr_data": json_response, + "image_width": width, + "image_height": height + } + else: + error_text = await response.text() + logger.error(f"OCR service returned error: {response.status}, {error_text}") + return None + except Exception as e: + logger.error(f"Error calling OCR service: {str(e)}") + return None + + # Process all images concurrently + tasks = [process_single_image(image) for image in images] + results = await asyncio.gather(*tasks) + + # normalize EasyOCR results to RectangleLabels bounding boxes format of Label Studio + normalized_results = [] + for result in results: + if not result: + continue + + # Extract OCR response data + bboxes = result.get('ocr_data', {}).get('bboxes', []) + texts = result.get('ocr_data', {}).get('texts', []) + scores = result.get('ocr_data', {}).get('scores', []) + original_width = result.get('image_width', 1000) + original_height = result.get('image_height', 1000) + + # Convert to Label Studio format + label_studio_results = [] + + for i, (bbox, text, score) in enumerate(zip(bboxes, texts, scores)): + # EasyOCR bboxes format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] + # We need to convert to Label Studio format with x,y as top-left corner + # and width, height as percentages + + # Calculate top-left corner (minimum x and y) + x_values = [point[0] for point in bbox] + y_values = [point[1] for point in bbox] + + min_x = min(x_values) + min_y = min(y_values) + + # Calculate width and height + max_x = max(x_values) + max_y = max(y_values) + width = max_x - min_x + height = max_y - min_y + + # Convert to percentages + x_percent = (min_x / original_width) * 100 + y_percent = (min_y / original_height) * 100 + width_percent = (width / original_width) * 100 + height_percent = (height / original_height) * 100 + + # generate unique id for the annotation + id_gen = str(uuid.uuid4())[:8] + # Create Label Studio format annotation + annotation = { + "x": x_percent, + "y": y_percent, + "width": width_percent, + "height": height_percent, + "rectanglelabels": ['Transcription'], # TODO: customize + "score": score, + "text": text + } + + label_studio_results.append(annotation) + + # Replace the OCR result with the Label Studio formatted result + normalized_results.append(label_studio_results) + + return normalized_results + + def _calculate_similarity(self, text: str, reference_texts: List[str]) -> float: + """ + Calculate similarity between a text and a list of reference texts + + Args: + text: The text to compare + reference_texts: List of reference texts + + Returns: + Similarity score between 0 and 1 + """ + # Convert to lowercase for case-insensitive comparison + text = text.lower() + reference_texts = [ref_text.lower() for ref_text in reference_texts] + + # Use SequenceMatcher to calculate similarity + from difflib import SequenceMatcher + + best_score = 0 + if reference_texts: + for ref_text in reference_texts: + # Calculate similarity ratio using SequenceMatcher + similarity = SequenceMatcher(None, text, ref_text).ratio() + print(f"Similarity between {text} and {ref_text}: {similarity}") + + # Update best score if this one is higher + if similarity > best_score: + best_score = similarity + + return best_score + + def _filter_ocr_results(self, ocr_results: list, reference_texts: List[str]) -> list: + """ + Filter OCR results based on similarity to output texts + + Args: + ocr_results: List of OCR results + reference_texts: List of reference texts + + Returns: + List of filtered OCR results + """ + filtered_results = [] + for result in ocr_results: + # Simple similarity function - can be replaced with more sophisticated methods + similarity = self._calculate_similarity(result['text'], reference_texts) + if similarity >= 0.9: + filtered_results.append(result) + + return filtered_results + + + async def aapply( + self, + input: InternalDataFrame, + runtime: AsyncRuntime, + ) -> InternalDataFrame: + + with json_schema_to_pydantic(self.field_schema) as ResponseModel: + # special handling to flag image inputs if they exist + input_field_types = defaultdict(lambda: MessageChunkType.TEXT) + for tag in self.image_tags: + # these are the project variable names, NOT the label config tag names. TODO: pass this info from LSE to avoid recomputing it here. + variables = extract_variable_name(tag.value) + if len(variables) != 1: + logger.warning( + f"Image tag {tag.name} has multiple variables: {variables}. Cannot mark these variables as image inputs." + ) + continue + input_field_types[variables[0]] = ( + MessageChunkType.IMAGE_URLS + if tag.attr.get("valueList") + else MessageChunkType.IMAGE_URL + ) + + logger.debug( + f"Using VisionRuntime with input field types: {input_field_types}" + ) + output = await runtime.batch_to_batch( + input, + input_template=self.input_template, + output_template="", + instructions_template=self.instructions, + response_model=ResponseModel, + input_field_types=input_field_types, + ) + output['label'] = await self.process_images_with_ocr(input['image'].tolist()) + + # Convert OCR results to a format that can be used for similarity matching + # Process each row individually + # Process each row to filter OCR results based on similarity to reference texts + filtered_labels = [] + for i, row in output.iterrows(): + # Filter OCR results based on similarity to reference texts + filtered_ocr = self._filter_ocr_results(row['label'], row['output']) + filtered_labels.append(filtered_ocr) + output['label'] = filtered_labels + return output + + +if __name__ == "__main__": + images = [ + "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0000.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0001.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0002.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0003.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0004.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0005.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0006.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0007.png", + # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0008.png" + ] + results = asyncio.run(LabelStudioSkillImageOCR.process_images_with_ocr(images)) + print(results) From aa45c0a270e5000744e785167934dceb6141d583 Mon Sep 17 00:00:00 2001 From: nik Date: Thu, 6 Mar 2025 00:00:49 +0100 Subject: [PATCH 02/10] Add per-region textarea support --- .../collection/label_studio_image_ocr.py | 103 ++++++++++++------ 1 file changed, 71 insertions(+), 32 deletions(-) diff --git a/adala/skills/collection/label_studio_image_ocr.py b/adala/skills/collection/label_studio_image_ocr.py index 1171eb8b..6d6f7b50 100644 --- a/adala/skills/collection/label_studio_image_ocr.py +++ b/adala/skills/collection/label_studio_image_ocr.py @@ -1,7 +1,7 @@ import re import logging import pandas as pd -from typing import List, Optional, Type +from typing import List, Optional, Type, Dict, Tuple from functools import cached_property from copy import deepcopy from collections import defaultdict @@ -208,8 +208,13 @@ async def process_single_image(image_data): tasks = [process_single_image(image) for image in images] results = await asyncio.gather(*tasks) + return results + + def _convert_ocr_results_to_label_studio_format(self, results: list) -> Tuple[List, List]: + # normalize EasyOCR results to RectangleLabels bounding boxes format of Label Studio - normalized_results = [] + all_bbox_annotations = [] + all_text_annotations = [] for result in results: if not result: continue @@ -222,7 +227,8 @@ async def process_single_image(image_data): original_height = result.get('image_height', 1000) # Convert to Label Studio format - label_studio_results = [] + bbox_annotations = [] + text_annotations = [] for i, (bbox, text, score) in enumerate(zip(bboxes, texts, scores)): # EasyOCR bboxes format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] @@ -251,24 +257,41 @@ async def process_single_image(image_data): # generate unique id for the annotation id_gen = str(uuid.uuid4())[:8] # Create Label Studio format annotation - annotation = { + bbox_annotation = { "x": x_percent, "y": y_percent, "width": width_percent, "height": height_percent, - "rectanglelabels": ['Transcription'], # TODO: customize - "score": score, - "text": text + "rotation": 0, + "id": id_gen, + # "rectanglelabels": ['Transcription'], # TODO: customize + # "score": score, + # "text": text + } + text_annotation = { + 'text': [text], + 'id': id_gen } + # annotation = { + # "x": x_percent, + # "y": y_percent, + # "width": width_percent, + # "height": height_percent, + # "rectanglelabels": ['Transcription'], # TODO: customize + # "score": score, + # "text": text + # } - label_studio_results.append(annotation) + bbox_annotations.append(bbox_annotation) + text_annotations.append(text_annotation) # Replace the OCR result with the Label Studio formatted result - normalized_results.append(label_studio_results) + all_bbox_annotations.append(bbox_annotations) + all_text_annotations.append(text_annotations) - return normalized_results + return all_bbox_annotations, all_text_annotations - def _calculate_similarity(self, text: str, reference_texts: List[str]) -> float: + def _calculate_similarity(self, text: str, reference_texts: List[str]) -> Tuple[float, str]: """ Calculate similarity between a text and a list of reference texts @@ -277,29 +300,29 @@ def _calculate_similarity(self, text: str, reference_texts: List[str]) -> float: reference_texts: List of reference texts Returns: - Similarity score between 0 and 1 + Similarity score between 0 and 1 and the best matching text """ # Convert to lowercase for case-insensitive comparison text = text.lower() - reference_texts = [ref_text.lower() for ref_text in reference_texts] # Use SequenceMatcher to calculate similarity from difflib import SequenceMatcher best_score = 0 + best_match = None if reference_texts: for ref_text in reference_texts: # Calculate similarity ratio using SequenceMatcher - similarity = SequenceMatcher(None, text, ref_text).ratio() + similarity = SequenceMatcher(None, text, ref_text.lower()).ratio() print(f"Similarity between {text} and {ref_text}: {similarity}") # Update best score if this one is higher if similarity > best_score: best_score = similarity - - return best_score + best_match = ref_text + return best_score, best_match - def _filter_ocr_results(self, ocr_results: list, reference_texts: List[str]) -> list: + def _filter_ocr_results(self, ocr_results: Dict, reference_texts: List[str]) -> list: """ Filter OCR results based on similarity to output texts @@ -310,12 +333,18 @@ def _filter_ocr_results(self, ocr_results: list, reference_texts: List[str]) -> Returns: List of filtered OCR results """ - filtered_results = [] - for result in ocr_results: + filtered_results = { + 'bboxes': [], + 'texts': [], + 'scores': [] + } + for bbox, text, score in zip(ocr_results['bboxes'], ocr_results['texts'], ocr_results['scores']): # Simple similarity function - can be replaced with more sophisticated methods - similarity = self._calculate_similarity(result['text'], reference_texts) + similarity, best_match = self._calculate_similarity(text, reference_texts) if similarity >= 0.9: - filtered_results.append(result) + filtered_results['bboxes'].append(bbox) + filtered_results['texts'].append(best_match) + filtered_results['scores'].append(score) return filtered_results @@ -329,6 +358,7 @@ async def aapply( with json_schema_to_pydantic(self.field_schema) as ResponseModel: # special handling to flag image inputs if they exist input_field_types = defaultdict(lambda: MessageChunkType.TEXT) + image_value_key = None for tag in self.image_tags: # these are the project variable names, NOT the label config tag names. TODO: pass this info from LSE to avoid recomputing it here. variables = extract_variable_name(tag.value) @@ -337,7 +367,8 @@ async def aapply( f"Image tag {tag.name} has multiple variables: {variables}. Cannot mark these variables as image inputs." ) continue - input_field_types[variables[0]] = ( + image_value_key = variables[0] + input_field_types[image_value_key] = ( MessageChunkType.IMAGE_URLS if tag.attr.get("valueList") else MessageChunkType.IMAGE_URL @@ -354,17 +385,25 @@ async def aapply( response_model=ResponseModel, input_field_types=input_field_types, ) - output['label'] = await self.process_images_with_ocr(input['image'].tolist()) - - # Convert OCR results to a format that can be used for similarity matching - # Process each row individually - # Process each row to filter OCR results based on similarity to reference texts - filtered_labels = [] + print(f'Output: {output}') + print(f'Process images with OCR: {input[image_value_key].tolist()}') + ocr_results = await self.process_images_with_ocr(input[image_value_key].tolist()) + filtered_ocr_results = [] for i, row in output.iterrows(): - # Filter OCR results based on similarity to reference texts - filtered_ocr = self._filter_ocr_results(row['label'], row['output']) - filtered_labels.append(filtered_ocr) - output['label'] = filtered_labels + extracted_result = row['output'] + filtered_ocr_result = { + 'image_width': ocr_results[i]['image_width'], + 'image_height': ocr_results[i]['image_height'], + 'ocr_data': self._filter_ocr_results(ocr_results[i]['ocr_data'], extracted_result) + } + filtered_ocr_results.append(filtered_ocr_result) + print(f'Filtered OCR results: {filtered_ocr_results}') + # convert filtered OCR results to Label Studio format + bbox_annotations, text_annotations = self._convert_ocr_results_to_label_studio_format(filtered_ocr_results) + print(f'Bbox annotations: {bbox_annotations}') + print(f'Text annotations: {text_annotations}') + output['bbox'] = bbox_annotations + output['transcription'] = text_annotations return output From b00ee19820ccdd5bd27c8d9f031a51c26f253153 Mon Sep 17 00:00:00 2001 From: niklub Date: Thu, 6 Mar 2025 08:18:20 +0000 Subject: [PATCH 03/10] Sync Follow Merge dependencies Workflow run: https://github.com/HumanSignal/Adala/actions/runs/13694164225 --- poetry.lock | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9580fe2a..e23593b7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3696,7 +3696,7 @@ optional = false python-versions = ">=3.9,<4" groups = ["main"] files = [ - {file = "4b2f1d9b3befa4a546907321912055a43a408d17.zip", hash = "sha256:4fb8066f51597a7b0b3687527140f3bf8a7259d4e93a9c3de0920f0ef9c4e0ff"}, + {file = "9bed76c758ac7ffc312c62634d0e9cd873ea5084.zip", hash = "sha256:3437c38a449da104ee01eaa5f22c27206cc93d4d02657574be9c065ad8df4134"}, ] [package.dependencies] @@ -3722,7 +3722,7 @@ xmljson = "0.2.1" [package.source] type = "url" -url = "https://github.com/HumanSignal/label-studio-sdk/archive/4b2f1d9b3befa4a546907321912055a43a408d17.zip" +url = "https://github.com/HumanSignal/label-studio-sdk/archive/9bed76c758ac7ffc312c62634d0e9cd873ea5084.zip" [[package]] name = "litellm" @@ -8487,4 +8487,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "c5fdce848262a8d86c0ef4f385c95754a408d24077294127ed3294f66b77c66c" +content-hash = "79044b88e569b6564f33b5d1eb1a4b43213d98e7778edbf8dfaafdc78666bf99" diff --git a/pyproject.toml b/pyproject.toml index 0d24e767..b09800a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ celery = {version = "^5.3.6", extras = ["redis"]} kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery uvicorn = "*" pydantic-settings = "^2.2.1" -label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/4b2f1d9b3befa4a546907321912055a43a408d17.zip"} +label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/9bed76c758ac7ffc312c62634d0e9cd873ea5084.zip"} kafka-python-ng = "^2.2.3" requests = "^2.32.0" # Using litellm from forked repo until vertex fix is released: https://github.com/BerriAI/litellm/issues/7904 From 0b626a7507cf796007c5fb50c78ee8b1799a6fef Mon Sep 17 00:00:00 2001 From: niklub Date: Mon, 10 Mar 2025 22:19:54 +0000 Subject: [PATCH 04/10] Sync Follow Merge dependencies Workflow run: https://github.com/HumanSignal/Adala/actions/runs/13775885307 --- poetry.lock | 164 ++++++++++++++++++++++--------------------------- pyproject.toml | 2 +- 2 files changed, 73 insertions(+), 93 deletions(-) diff --git a/poetry.lock b/poetry.lock index 629690e5..ef7d66bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -123,7 +123,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.12.0,<2.0" [package.extras] -speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] +speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] [[package]] name = "aiokafka" @@ -206,8 +206,8 @@ files = [ ] [package.extras] -dev = ["aiounittest (==1.4.1)", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] -docs = ["sphinx (==6.1.3)", "sphinx-mdinclude (==0.5.3)"] +dev = ["aiounittest (==1.4.1) ; python_version < \"3.8\"", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] +docs = ["sphinx (==6.1.3) ; python_version >= \"3.8\"", "sphinx-mdinclude (==0.5.3)"] [[package]] name = "amqp" @@ -256,7 +256,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""] trio = ["trio (>=0.26.1)"] [[package]] @@ -412,8 +412,8 @@ files = [ six = ">=1.12.0" [package.extras] -astroid = ["astroid (>=1,<2)", "astroid (>=2,<4)"] -test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] +astroid = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\""] +test = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\"", "pytest"] [[package]] name = "async-lru" @@ -455,12 +455,12 @@ files = [ ] [package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\""] [[package]] name = "babel" @@ -607,7 +607,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -694,7 +694,7 @@ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} [package.extras] docs = ["furo (>=2023.08.17)", "sphinx (>=7.0,<8.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)", "sphinx-issues (>=3.0.0)"] -test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0)", "setuptools (>=56.0.0)", "setuptools (>=56.0.0)", "setuptools (>=67.8.0)", "wheel (>=0.36.0)"] +test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0) ; python_version < \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.11\"", "setuptools (>=67.8.0) ; python_version >= \"3.12\"", "wheel (>=0.36.0)"] typing = ["build[uv]", "importlib-metadata (>=5.1)", "mypy (>=1.9.0,<1.10.0)", "tomli", "typing-extensions (>=3.7.4.3)"] uv = ["uv (>=0.1.18)"] virtualenv = ["virtualenv (>=20.0.35)"] @@ -740,32 +740,32 @@ vine = ">=5.1.0,<6.0" arangodb = ["pyArango (>=2.0.2)"] auth = ["cryptography (==42.0.5)"] azureblockblob = ["azure-storage-blob (>=12.15.0)"] -brotli = ["brotli (>=1.0.0)", "brotlipy (>=0.7.0)"] +brotli = ["brotli (>=1.0.0) ; platform_python_implementation == \"CPython\"", "brotlipy (>=0.7.0) ; platform_python_implementation == \"PyPy\""] cassandra = ["cassandra-driver (>=3.25.0,<4)"] consul = ["python-consul2 (==0.1.5)"] cosmosdbsql = ["pydocumentdb (==2.3.5)"] -couchbase = ["couchbase (>=3.0.0)"] +couchbase = ["couchbase (>=3.0.0) ; platform_python_implementation != \"PyPy\" and (platform_system != \"Windows\" or python_version < \"3.10\")"] couchdb = ["pycouchdb (==1.14.2)"] django = ["Django (>=2.2.28)"] dynamodb = ["boto3 (>=1.26.143)"] elasticsearch = ["elastic-transport (<=8.13.0)", "elasticsearch (<=8.13.0)"] -eventlet = ["eventlet (>=0.32.0)"] +eventlet = ["eventlet (>=0.32.0) ; python_version < \"3.10\""] gcs = ["google-cloud-storage (>=2.10.0)"] gevent = ["gevent (>=1.5.0)"] -librabbitmq = ["librabbitmq (>=2.0.0)"] -memcache = ["pylibmc (==1.6.3)"] +librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] +memcache = ["pylibmc (==1.6.3) ; platform_system != \"Windows\""] mongodb = ["pymongo[srv] (>=4.0.2)"] msgpack = ["msgpack (==1.0.8)"] pymemcache = ["python-memcached (>=1.61)"] -pyro = ["pyro4 (==4.82)"] +pyro = ["pyro4 (==4.82) ; python_version < \"3.11\""] pytest = ["pytest-celery[all] (>=1.0.0)"] redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] s3 = ["boto3 (>=1.26.143)"] slmq = ["softlayer-messaging (>=1.0.3)"] -solar = ["ephem (==4.1.5)"] +solar = ["ephem (==4.1.5) ; platform_python_implementation != \"PyPy\""] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] -tblib = ["tblib (>=1.3.0)", "tblib (>=1.5.0)"] +sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] +tblib = ["tblib (>=1.3.0) ; python_version < \"3.8.0\"", "tblib (>=1.5.0) ; python_version >= \"3.8.0\""] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=1.3.1)"] zstd = ["zstandard (==0.22.0)"] @@ -1208,10 +1208,10 @@ files = [ ] [package.extras] -avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests"] -dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "flake8", "pytest", "pytest (==4.6.4)", "pytest-timeout", "requests"] -doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests", "sphinx", "sphinx-rtd-theme"] -json = ["jsonschema", "pyrsistent", "pyrsistent (==0.16.1)", "requests"] +avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests"] +dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "flake8", "pytest (==4.6.4) ; python_version < \"3.0\"", "pytest ; python_version >= \"3.0\"", "pytest-timeout", "requests"] +doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests", "sphinx", "sphinx-rtd-theme"] +json = ["jsonschema", "pyrsistent (==0.16.1) ; python_version < \"3.0\"", "pyrsistent ; python_version > \"3.0\"", "requests"] protobuf = ["protobuf", "requests"] schema-registry = ["requests"] @@ -1386,7 +1386,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli"] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "cryptography" @@ -1476,8 +1476,8 @@ jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, - {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version == \"3.10\""}, + {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""}, ] pyyaml = ">=6.0.1" @@ -1522,7 +1522,7 @@ xxhash = "*" apache-beam = ["apache-beam (>=2.26.0)"] audio = ["librosa", "soundfile (>=0.12.1)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] @@ -1530,7 +1530,7 @@ quality = ["ruff (>=0.3.0)"] s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.6.0)"] tensorflow-gpu = ["tensorflow (>=2.6.0)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=6.2.1)"] @@ -1738,7 +1738,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version <= \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -1760,7 +1760,7 @@ files = [ ] [package.extras] -tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] +tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] [[package]] name = "faker" @@ -1853,7 +1853,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] -typing = ["typing-extensions (>=4.12.2)"] +typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] [[package]] name = "flatbuffers" @@ -1945,18 +1945,18 @@ files = [ ] [package.extras] -all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] graphite = ["lz4 (>=1.7.4.2)"] -interpolatable = ["munkres", "pycairo", "scipy"] +interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] lxml = ["lxml (>=4.0)"] pathops = ["skia-pathops (>=0.5.0)"] plot = ["matplotlib"] repacker = ["uharfbuzz (>=0.23.0)"] symfont = ["sympy"] -type1 = ["xattr"] +type1 = ["xattr ; sys_platform == \"darwin\""] ufo = ["fs (>=2.2.0,<3)"] -unicode = ["unicodedata2 (>=15.1.0)"] -woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] +unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""] +woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] [[package]] name = "fqdn" @@ -2531,7 +2531,7 @@ idna = "*" sniffio = "*" [package.extras] -brotli = ["brotli", "brotlicffi"] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -2740,7 +2740,7 @@ zipp = ">=0.5" [package.extras] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] +test = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] [[package]] name = "importlib-resources" @@ -2759,7 +2759,7 @@ markers = {dev = "python_version < \"3.10\""} zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] @@ -2780,7 +2780,7 @@ files = [ [package.extras] docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] -testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\""] [[package]] name = "iniconfig" @@ -3218,7 +3218,7 @@ traitlets = ">=5.3" [package.extras] docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] +test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko ; sys_platform == \"win32\"", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] [[package]] name = "jupyter-console" @@ -3648,7 +3648,7 @@ azureservicebus = ["azure-servicebus (>=7.10.0)"] azurestoragequeues = ["azure-identity (>=1.12.0)", "azure-storage-queue (>=12.6.0)"] confluentkafka = ["confluent-kafka (>=2.2.0)"] consul = ["python-consul2 (==0.1.5)"] -librabbitmq = ["librabbitmq (>=2.0.0)"] +librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] mongodb = ["pymongo (>=4.1.1)"] msgpack = ["msgpack (==1.1.0)"] pyro = ["pyro4 (==4.82)"] @@ -3656,7 +3656,7 @@ qpid = ["qpid-python (>=0.26)", "qpid-tools (>=0.26)"] redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2)"] slmq = ["softlayer-messaging (>=1.0.3)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] +sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=2.8.0)"] @@ -3696,7 +3696,7 @@ optional = false python-versions = ">=3.9,<4" groups = ["main"] files = [ - {file = "9bed76c758ac7ffc312c62634d0e9cd873ea5084.zip", hash = "sha256:3437c38a449da104ee01eaa5f22c27206cc93d4d02657574be9c065ad8df4134"}, + {file = "5cc1d11e0afbaae6d597c1cbe12805b02af82aac.zip", hash = "sha256:99362c76b98c84e3d24aa6b194768e92c27788216c97663036dcf661c69eaf0b"}, ] [package.dependencies] @@ -3722,7 +3722,7 @@ xmljson = "0.2.1" [package.source] type = "url" -url = "https://github.com/HumanSignal/label-studio-sdk/archive/9bed76c758ac7ffc312c62634d0e9cd873ea5084.zip" +url = "https://github.com/HumanSignal/label-studio-sdk/archive/5cc1d11e0afbaae6d597c1cbe12805b02af82aac.zip" [[package]] name = "litellm" @@ -4191,7 +4191,7 @@ watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] [[package]] name = "mkdocs-autorefs" @@ -4508,7 +4508,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] +gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] [[package]] @@ -4529,7 +4529,7 @@ PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.14,<0.18)", "pymsalruntime (>=0.17,<0.18)"] +broker = ["pymsalruntime (>=0.14,<0.18) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.18) ; python_version >= \"3.8\" and platform_system == \"Darwin\""] [[package]] name = "multidict" @@ -4817,7 +4817,7 @@ tornado = ">=6.2.0" [package.extras] dev = ["hatch", "pre-commit"] docs = ["myst-parser", "nbsphinx", "pydata-sphinx-theme", "sphinx (>=1.3.6)", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] +test = ["importlib-resources (>=5.0) ; python_version < \"3.10\"", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] [[package]] name = "notebook-shim" @@ -5524,7 +5524,7 @@ docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] @@ -5663,7 +5663,7 @@ files = [ ] [package.extras] -test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] +test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""] [[package]] name = "ptyprocess" @@ -5817,7 +5817,7 @@ typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata"] +timezone = ["tzdata ; python_version >= \"3.9\" and sys_platform == \"win32\""] [[package]] name = "pydantic-core" @@ -6992,9 +6992,9 @@ files = [ ] [package.extras] -nativelib = ["pyobjc-framework-Cocoa", "pywin32"] -objc = ["pyobjc-framework-Cocoa"] -win32 = ["pywin32"] +nativelib = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\"", "pywin32 ; sys_platform == \"win32\""] +objc = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\""] +win32 = ["pywin32 ; sys_platform == \"win32\""] [[package]] name = "setuptools" @@ -7009,9 +7009,9 @@ files = [ ] [package.extras] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6) ; python_version < \"3.10\"", "importlib-resources (>=5.10.2) ; python_version < \"3.9\"", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (<0.4) ; platform_system == \"Windows\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.3.2) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "shellingham" @@ -7396,7 +7396,7 @@ files = [ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] -markers = {main = "python_version < \"3.11\""} +markers = {main = "python_version <= \"3.10\""} [[package]] name = "tomli" @@ -7405,7 +7405,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version <= \"3.10\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -7692,36 +7692,16 @@ description = "HTTP library with thread-safe connection pooling, file post, and optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" groups = ["main", "dev"] -markers = "platform_python_implementation == \"PyPy\" or python_version < \"3.10\"" files = [ {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, ] [package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] -[[package]] -name = "urllib3" -version = "2.2.3" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.8" -groups = ["main", "dev"] -markers = "python_version >= \"3.10\" and platform_python_implementation != \"PyPy\"" -files = [ - {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, - {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - [[package]] name = "uvicorn" version = "0.30.6" @@ -7742,12 +7722,12 @@ httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -7756,7 +7736,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\"" +markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"" files = [ {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9ebafa0b96c62881d5cafa02d9da2e44c23f9f0cd829f3a32a6aff771449c996"}, {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:35968fc697b0527a06e134999eef859b4034b37aebca537daeb598b9d45a137b"}, @@ -7793,7 +7773,7 @@ files = [ [package.extras] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0) ; python_version >= \"3.12\"", "aiohttp (>=3.8.1) ; python_version < \"3.12\"", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "vcrpy" @@ -8477,14 +8457,14 @@ files = [ markers = {dev = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "79044b88e569b6564f33b5d1eb1a4b43213d98e7778edbf8dfaafdc78666bf99" +content-hash = "5bb9e01fd3caa4e768a907809651e0477d9416971ef63988323bfdc8f0302325" diff --git a/pyproject.toml b/pyproject.toml index f758c590..0e6b7653 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ celery = {version = "^5.3.6", extras = ["redis"]} kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery uvicorn = "*" pydantic-settings = "^2.2.1" -label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/9bed76c758ac7ffc312c62634d0e9cd873ea5084.zip"} +label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/5cc1d11e0afbaae6d597c1cbe12805b02af82aac.zip"} kafka-python-ng = "^2.2.3" requests = "^2.32.0" # Using litellm from forked repo until vertex fix is released: https://github.com/BerriAI/litellm/issues/7904 From d39a5dc825ef01ac366f8a6238f8100204cd8552 Mon Sep 17 00:00:00 2001 From: nik Date: Mon, 10 Mar 2025 22:22:33 +0000 Subject: [PATCH 05/10] Lock poetry --- .../collection/label_studio_image_ocr.py | 264 +++++++++++++----- poetry.lock | 263 ++++++++++++----- pyproject.toml | 1 + 3 files changed, 389 insertions(+), 139 deletions(-) diff --git a/adala/skills/collection/label_studio_image_ocr.py b/adala/skills/collection/label_studio_image_ocr.py index 6d6f7b50..fd41c7cf 100644 --- a/adala/skills/collection/label_studio_image_ocr.py +++ b/adala/skills/collection/label_studio_image_ocr.py @@ -9,6 +9,7 @@ import base64 import asyncio import io +from thefuzz import fuzz from PIL import Image from urllib.parse import urlparse import uuid @@ -210,6 +211,34 @@ async def process_single_image(image_data): return results + def _get_normalized_bbox(self, bbox: List[List[int]], original_width: int, original_height: int) -> List[float]: + # Calculate top-left corner (minimum x and y) + # bbox format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] + x_values = [point[0] for point in bbox] + y_values = [point[1] for point in bbox] + + min_x = min(x_values) + min_y = min(y_values) + + # Calculate width and height + max_x = max(x_values) + max_y = max(y_values) + width = max_x - min_x + height = max_y - min_y + + # Convert to percentages + x_percent = (min_x / original_width) * 100 + y_percent = (min_y / original_height) * 100 + width_percent = (width / original_width) * 100 + height_percent = (height / original_height) * 100 + + return { + 'x': x_percent, + 'y': y_percent, + 'width': width_percent, + 'height': height_percent + } + def _convert_ocr_results_to_label_studio_format(self, results: list) -> Tuple[List, List]: # normalize EasyOCR results to RectangleLabels bounding boxes format of Label Studio @@ -235,52 +264,18 @@ def _convert_ocr_results_to_label_studio_format(self, results: list) -> Tuple[Li # We need to convert to Label Studio format with x,y as top-left corner # and width, height as percentages - # Calculate top-left corner (minimum x and y) - x_values = [point[0] for point in bbox] - y_values = [point[1] for point in bbox] - - min_x = min(x_values) - min_y = min(y_values) - - # Calculate width and height - max_x = max(x_values) - max_y = max(y_values) - width = max_x - min_x - height = max_y - min_y - - # Convert to percentages - x_percent = (min_x / original_width) * 100 - y_percent = (min_y / original_height) * 100 - width_percent = (width / original_width) * 100 - height_percent = (height / original_height) * 100 + bbox_annotation = self._get_normalized_bbox(bbox, original_width, original_height) # generate unique id for the annotation id_gen = str(uuid.uuid4())[:8] # Create Label Studio format annotation - bbox_annotation = { - "x": x_percent, - "y": y_percent, - "width": width_percent, - "height": height_percent, - "rotation": 0, - "id": id_gen, - # "rectanglelabels": ['Transcription'], # TODO: customize - # "score": score, - # "text": text - } + bbox_annotation['id'] = id_gen + bbox_annotation['rotation'] = 0 + text_annotation = { 'text': [text], 'id': id_gen } - # annotation = { - # "x": x_percent, - # "y": y_percent, - # "width": width_percent, - # "height": height_percent, - # "rectanglelabels": ['Transcription'], # TODO: customize - # "score": score, - # "text": text - # } bbox_annotations.append(bbox_annotation) text_annotations.append(text_annotation) @@ -291,9 +286,65 @@ def _convert_ocr_results_to_label_studio_format(self, results: list) -> Tuple[Li return all_bbox_annotations, all_text_annotations - def _calculate_similarity(self, text: str, reference_texts: List[str]) -> Tuple[float, str]: + def _convert_ocr_results_to_label_studio_format_v2(self, results: list) -> Tuple[List, List]: + """ + Same as _convert_ocr_results_to_label_studio_format, but uses a different approach to filter the OCR results: + results['ocr_data'] contains a dictionary of reference texts as keys and lists of OCR results as values. + For each reference text, we create a group of `bbox_annotations` and `text_annotations`. + In text_annotations, we place "parentID" as the id of the bbox_annotation that it belongs to (pick the first text_annotation id as parentID) """ - Calculate similarity between a text and a list of reference texts + all_bbox_annotations = [] + all_text_annotations = [] + + for result in results: + bbox_annotations = [] + text_annotations = [] + + original_width = result['image_width'] + original_height = result['image_height'] + + # Process each reference text and its associated OCR results + for reference_text, ocr_matches in result['ocr_data'].items(): + # Create a group for this reference text + group_id = None + + # Process each OCR match for this reference text + for bbox, text, score in zip(ocr_matches['bboxes'], ocr_matches['texts'], ocr_matches['scores']): + + bbox_annotation = self._get_normalized_bbox(bbox, original_width, original_height) + + # Generate unique id for the annotation + id_gen = str(uuid.uuid4())[:8] + if group_id is None: + group_id = id_gen + + # Create bbox annotation + bbox_annotation['rotation'] = 0 + bbox_annotation['id'] = id_gen + bbox_annotation['score'] = score + # Create text annotation + text_annotation = { + 'text': [text], + 'id': id_gen, + } + if group_id != id_gen: + text_annotation['parent_id'] = group_id + bbox_annotation['parent_id'] = group_id + + bbox_annotations.append(bbox_annotation) + text_annotations.append(text_annotation) + + # Add annotations for this result to the overall lists + all_bbox_annotations.append(bbox_annotations) + all_text_annotations.append(text_annotations) + + return all_bbox_annotations, all_text_annotations + + + @classmethod + def _calculate_similarity(cls, text: str, reference_texts: List[str]) -> Tuple[float, str]: + """ + Calculate similarity between a text and substrings within reference texts. Args: text: The text to compare @@ -304,22 +355,21 @@ def _calculate_similarity(self, text: str, reference_texts: List[str]) -> Tuple[ """ # Convert to lowercase for case-insensitive comparison text = text.lower() - - # Use SequenceMatcher to calculate similarity - from difflib import SequenceMatcher + text_len = len(text) best_score = 0 best_match = None + if reference_texts: for ref_text in reference_texts: - # Calculate similarity ratio using SequenceMatcher - similarity = SequenceMatcher(None, text, ref_text.lower()).ratio() - print(f"Similarity between {text} and {ref_text}: {similarity}") - - # Update best score if this one is higher - if similarity > best_score: - best_score = similarity + ref_text_lower = ref_text.lower() + best_window_score = fuzz.partial_ratio(text, ref_text_lower) + if best_window_score > best_score: + best_score = best_window_score best_match = ref_text + best_score = float(best_score) / 100 + print(f"Best substring similarity between '{text}' and '{best_match}': {best_score}") + return best_score, best_match def _filter_ocr_results(self, ocr_results: Dict, reference_texts: List[str]) -> list: @@ -347,6 +397,99 @@ def _filter_ocr_results(self, ocr_results: Dict, reference_texts: List[str]) -> filtered_results['scores'].append(score) return filtered_results + + def _filter_ocr_results_v2(self, ocr_results: Dict, reference_texts: List[str]) -> Dict[str, List]: + + output = {} + for ref_text in reference_texts: + ref_text_lower = ref_text.lower() + output[ref_text] = { + 'bboxes': [], + 'texts': [], + 'scores': [] + } + for text, score, bbox in zip(ocr_results['texts'], ocr_results['scores'], ocr_results['bboxes']): + text_lower = text.lower() + # check if text is a fuzzy substring of ref_text + similarity = fuzz.partial_ratio(text_lower, ref_text_lower) + if similarity >= 95: + output[ref_text]['bboxes'].append(bbox) + output[ref_text]['texts'].append(text) + output[ref_text]['scores'].append(score) + + # Filter to keep only horizontally aligned bounding boxes + for ref_text in output: + if not output[ref_text]['bboxes']: + continue + + # Group bounding boxes by their vertical position (y-coordinate) + # Using the middle y-coordinate of each box for grouping + y_groups = {} + for i, bbox in enumerate(output[ref_text]['bboxes']): + # Calculate middle y-coordinate of the bounding box + # bbox format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] + # Calculate middle y-coordinate of the bounding box + # bbox format is [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] + y_values = [point[1] for point in bbox] + mid_y = sum(y_values) / len(y_values) + + # Group with tolerance of 10 pixels + group_key = int(mid_y / 10) * 10 + if group_key not in y_groups: + y_groups[group_key] = [] + y_groups[group_key].append(i) + + # Find the group with the maximum number of bounding boxes + max_group_key = max(y_groups.keys(), key=lambda k: len(y_groups[k]), default=None) + + if max_group_key is not None: + # Keep only the bounding boxes in the largest horizontal group + indices_to_keep = y_groups[max_group_key] + + # Create new filtered lists + filtered_bboxes = [output[ref_text]['bboxes'][i] for i in indices_to_keep] + filtered_texts = [output[ref_text]['texts'][i] for i in indices_to_keep] + filtered_scores = [output[ref_text]['scores'][i] for i in indices_to_keep] + + # Sort bounding boxes by x-coordinate to maintain reading order + sorted_indices = sorted(range(len(filtered_bboxes)), + key=lambda i: min(point[0] for point in filtered_bboxes[i])) + + filtered_bboxes = [filtered_bboxes[i] for i in sorted_indices] + filtered_texts = [filtered_texts[i] for i in sorted_indices] + filtered_scores = [filtered_scores[i] for i in sorted_indices] + + # Create a combined bounding box that encompasses all individual boxes + if filtered_bboxes: + # Find min and max coordinates across all bounding boxes + all_x = [point[0] for bbox in filtered_bboxes for point in bbox] + all_y = [point[1] for bbox in filtered_bboxes for point in bbox] + + min_x, max_x = min(all_x), max(all_x) + min_y, max_y = min(all_y), max(all_y) + + # Create a new bounding box with the min/max coordinates + combined_bbox = [ + [min_x, min_y], # top-left + [max_x, min_y], # top-right + [max_x, max_y], # bottom-right + [min_x, max_y] # bottom-left + ] + + # Calculate average score + avg_score = sum(filtered_scores) / len(filtered_scores) if filtered_scores else 0 + + # Add the combined bounding box to the results + filtered_bboxes.insert(0, combined_bbox) + filtered_texts.insert(0, ref_text) # Use the reference text for the combined box + filtered_scores.insert(0, avg_score) + + # Update the output with filtered results + output[ref_text]['bboxes'] = filtered_bboxes + output[ref_text]['texts'] = filtered_texts + output[ref_text]['scores'] = filtered_scores + + return output async def aapply( @@ -394,30 +537,15 @@ async def aapply( filtered_ocr_result = { 'image_width': ocr_results[i]['image_width'], 'image_height': ocr_results[i]['image_height'], - 'ocr_data': self._filter_ocr_results(ocr_results[i]['ocr_data'], extracted_result) + 'ocr_data': self._filter_ocr_results_v2(ocr_results[i]['ocr_data'], extracted_result) } filtered_ocr_results.append(filtered_ocr_result) print(f'Filtered OCR results: {filtered_ocr_results}') # convert filtered OCR results to Label Studio format - bbox_annotations, text_annotations = self._convert_ocr_results_to_label_studio_format(filtered_ocr_results) + # bbox_annotations, text_annotations = self._convert_ocr_results_to_label_studio_format(filtered_ocr_results) + bbox_annotations, text_annotations = self._convert_ocr_results_to_label_studio_format_v2(filtered_ocr_results) print(f'Bbox annotations: {bbox_annotations}') print(f'Text annotations: {text_annotations}') output['bbox'] = bbox_annotations output['transcription'] = text_annotations return output - - -if __name__ == "__main__": - images = [ - "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0000.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0001.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0002.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0003.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0004.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0005.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0006.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0007.png", - # "https://htx-pub.s3.amazonaws.com/demo/ocr/pdf/output_0008.png" - ] - results = asyncio.run(LabelStudioSkillImageOCR.process_images_with_ocr(images)) - print(results) diff --git a/poetry.lock b/poetry.lock index ef7d66bb..e9434af6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -123,7 +123,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.12.0,<2.0" [package.extras] -speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] [[package]] name = "aiokafka" @@ -206,8 +206,8 @@ files = [ ] [package.extras] -dev = ["aiounittest (==1.4.1) ; python_version < \"3.8\"", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] -docs = ["sphinx (==6.1.3) ; python_version >= \"3.8\"", "sphinx-mdinclude (==0.5.3)"] +dev = ["aiounittest (==1.4.1)", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] +docs = ["sphinx (==6.1.3)", "sphinx-mdinclude (==0.5.3)"] [[package]] name = "amqp" @@ -256,7 +256,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -412,8 +412,8 @@ files = [ six = ">=1.12.0" [package.extras] -astroid = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\""] -test = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\"", "pytest"] +astroid = ["astroid (>=1,<2)", "astroid (>=2,<4)"] +test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] [[package]] name = "async-lru" @@ -455,12 +455,12 @@ files = [ ] [package.extras] -benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] -cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\""] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "babel" @@ -607,7 +607,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -694,7 +694,7 @@ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} [package.extras] docs = ["furo (>=2023.08.17)", "sphinx (>=7.0,<8.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)", "sphinx-issues (>=3.0.0)"] -test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0) ; python_version < \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.11\"", "setuptools (>=67.8.0) ; python_version >= \"3.12\"", "wheel (>=0.36.0)"] +test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0)", "setuptools (>=56.0.0)", "setuptools (>=56.0.0)", "setuptools (>=67.8.0)", "wheel (>=0.36.0)"] typing = ["build[uv]", "importlib-metadata (>=5.1)", "mypy (>=1.9.0,<1.10.0)", "tomli", "typing-extensions (>=3.7.4.3)"] uv = ["uv (>=0.1.18)"] virtualenv = ["virtualenv (>=20.0.35)"] @@ -740,32 +740,32 @@ vine = ">=5.1.0,<6.0" arangodb = ["pyArango (>=2.0.2)"] auth = ["cryptography (==42.0.5)"] azureblockblob = ["azure-storage-blob (>=12.15.0)"] -brotli = ["brotli (>=1.0.0) ; platform_python_implementation == \"CPython\"", "brotlipy (>=0.7.0) ; platform_python_implementation == \"PyPy\""] +brotli = ["brotli (>=1.0.0)", "brotlipy (>=0.7.0)"] cassandra = ["cassandra-driver (>=3.25.0,<4)"] consul = ["python-consul2 (==0.1.5)"] cosmosdbsql = ["pydocumentdb (==2.3.5)"] -couchbase = ["couchbase (>=3.0.0) ; platform_python_implementation != \"PyPy\" and (platform_system != \"Windows\" or python_version < \"3.10\")"] +couchbase = ["couchbase (>=3.0.0)"] couchdb = ["pycouchdb (==1.14.2)"] django = ["Django (>=2.2.28)"] dynamodb = ["boto3 (>=1.26.143)"] elasticsearch = ["elastic-transport (<=8.13.0)", "elasticsearch (<=8.13.0)"] -eventlet = ["eventlet (>=0.32.0) ; python_version < \"3.10\""] +eventlet = ["eventlet (>=0.32.0)"] gcs = ["google-cloud-storage (>=2.10.0)"] gevent = ["gevent (>=1.5.0)"] -librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] -memcache = ["pylibmc (==1.6.3) ; platform_system != \"Windows\""] +librabbitmq = ["librabbitmq (>=2.0.0)"] +memcache = ["pylibmc (==1.6.3)"] mongodb = ["pymongo[srv] (>=4.0.2)"] msgpack = ["msgpack (==1.0.8)"] pymemcache = ["python-memcached (>=1.61)"] -pyro = ["pyro4 (==4.82) ; python_version < \"3.11\""] +pyro = ["pyro4 (==4.82)"] pytest = ["pytest-celery[all] (>=1.0.0)"] redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] s3 = ["boto3 (>=1.26.143)"] slmq = ["softlayer-messaging (>=1.0.3)"] -solar = ["ephem (==4.1.5) ; platform_python_implementation != \"PyPy\""] +solar = ["ephem (==4.1.5)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] -tblib = ["tblib (>=1.3.0) ; python_version < \"3.8.0\"", "tblib (>=1.5.0) ; python_version >= \"3.8.0\""] +sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] +tblib = ["tblib (>=1.3.0)", "tblib (>=1.5.0)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=1.3.1)"] zstd = ["zstandard (==0.22.0)"] @@ -1208,10 +1208,10 @@ files = [ ] [package.extras] -avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests"] -dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "flake8", "pytest (==4.6.4) ; python_version < \"3.0\"", "pytest ; python_version >= \"3.0\"", "pytest-timeout", "requests"] -doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests", "sphinx", "sphinx-rtd-theme"] -json = ["jsonschema", "pyrsistent (==0.16.1) ; python_version < \"3.0\"", "pyrsistent ; python_version > \"3.0\"", "requests"] +avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests"] +dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "flake8", "pytest", "pytest (==4.6.4)", "pytest-timeout", "requests"] +doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests", "sphinx", "sphinx-rtd-theme"] +json = ["jsonschema", "pyrsistent", "pyrsistent (==0.16.1)", "requests"] protobuf = ["protobuf", "requests"] schema-registry = ["requests"] @@ -1386,7 +1386,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli ; python_full_version <= \"3.11.0a6\""] +toml = ["tomli"] [[package]] name = "cryptography" @@ -1476,8 +1476,8 @@ jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, - {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version == \"3.10\""}, - {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, + {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""}, ] pyyaml = ">=6.0.1" @@ -1522,7 +1522,7 @@ xxhash = "*" apache-beam = ["apache-beam (>=2.26.0)"] audio = ["librosa", "soundfile (>=0.12.1)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] @@ -1530,7 +1530,7 @@ quality = ["ruff (>=0.3.0)"] s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.6.0)"] tensorflow-gpu = ["tensorflow (>=2.6.0)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=6.2.1)"] @@ -1738,7 +1738,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version <= \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -1760,7 +1760,7 @@ files = [ ] [package.extras] -tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] +tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] [[package]] name = "faker" @@ -1853,7 +1853,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] +typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "flatbuffers" @@ -1945,18 +1945,18 @@ files = [ ] [package.extras] -all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] graphite = ["lz4 (>=1.7.4.2)"] -interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] +interpolatable = ["munkres", "pycairo", "scipy"] lxml = ["lxml (>=4.0)"] pathops = ["skia-pathops (>=0.5.0)"] plot = ["matplotlib"] repacker = ["uharfbuzz (>=0.23.0)"] symfont = ["sympy"] -type1 = ["xattr ; sys_platform == \"darwin\""] +type1 = ["xattr"] ufo = ["fs (>=2.2.0,<3)"] -unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""] -woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] [[package]] name = "fqdn" @@ -2531,7 +2531,7 @@ idna = "*" sniffio = "*" [package.extras] -brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +brotli = ["brotli", "brotlicffi"] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -2740,7 +2740,7 @@ zipp = ">=0.5" [package.extras] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -test = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] [[package]] name = "importlib-resources" @@ -2759,7 +2759,7 @@ markers = {dev = "python_version < \"3.10\""} zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] @@ -2780,7 +2780,7 @@ files = [ [package.extras] docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] -testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\""] +testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [[package]] name = "iniconfig" @@ -3218,7 +3218,7 @@ traitlets = ">=5.3" [package.extras] docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko ; sys_platform == \"win32\"", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] +test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] [[package]] name = "jupyter-console" @@ -3648,7 +3648,7 @@ azureservicebus = ["azure-servicebus (>=7.10.0)"] azurestoragequeues = ["azure-identity (>=1.12.0)", "azure-storage-queue (>=12.6.0)"] confluentkafka = ["confluent-kafka (>=2.2.0)"] consul = ["python-consul2 (==0.1.5)"] -librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] +librabbitmq = ["librabbitmq (>=2.0.0)"] mongodb = ["pymongo (>=4.1.1)"] msgpack = ["msgpack (==1.1.0)"] pyro = ["pyro4 (==4.82)"] @@ -3656,7 +3656,7 @@ qpid = ["qpid-python (>=0.26)", "qpid-tools (>=0.26)"] redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2)"] slmq = ["softlayer-messaging (>=1.0.3)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] +sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=2.8.0)"] @@ -4191,7 +4191,7 @@ watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] [[package]] name = "mkdocs-autorefs" @@ -4508,7 +4508,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] +gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] [[package]] @@ -4529,7 +4529,7 @@ PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.14,<0.18) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.18) ; python_version >= \"3.8\" and platform_system == \"Darwin\""] +broker = ["pymsalruntime (>=0.14,<0.18)", "pymsalruntime (>=0.17,<0.18)"] [[package]] name = "multidict" @@ -4817,7 +4817,7 @@ tornado = ">=6.2.0" [package.extras] dev = ["hatch", "pre-commit"] docs = ["myst-parser", "nbsphinx", "pydata-sphinx-theme", "sphinx (>=1.3.6)", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["importlib-resources (>=5.0) ; python_version < \"3.10\"", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] +test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] [[package]] name = "notebook-shim" @@ -5524,7 +5524,7 @@ docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions ; python_version < \"3.10\""] +typing = ["typing-extensions"] xmp = ["defusedxml"] [[package]] @@ -5663,7 +5663,7 @@ files = [ ] [package.extras] -test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] [[package]] name = "ptyprocess" @@ -5817,7 +5817,7 @@ typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata ; python_version >= \"3.9\" and sys_platform == \"win32\""] +timezone = ["tzdata"] [[package]] name = "pydantic-core" @@ -6524,6 +6524,113 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} +[[package]] +name = "rapidfuzz" +version = "3.12.2" +description = "rapid fuzzy string matching" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b9a75e0385a861178adf59e86d6616cbd0d5adca7228dc9eeabf6f62cf5b0b1"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6906a7eb458731e3dd2495af1d0410e23a21a2a2b7ced535e6d5cd15cb69afc5"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4b3334a8958b689f292d5ce8a928140ac98919b51e084f04bf0c14276e4c6ba"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:85a54ce30345cff2c79cbcffa063f270ad1daedd0d0c3ff6e541d3c3ba4288cf"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:acb63c5072c08058f8995404201a52fc4e1ecac105548a4d03c6c6934bda45a3"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5385398d390c6571f0f2a7837e6ddde0c8b912dac096dc8c87208ce9aaaa7570"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5032cbffa245b4beba0067f8ed17392ef2501b346ae3c1f1d14b950edf4b6115"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:195adbb384d89d6c55e2fd71e7fb262010f3196e459aa2f3f45f31dd7185fe72"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f43b773a4d4950606fb25568ecde5f25280daf8f97b87eb323e16ecd8177b328"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:55a43be0e0fa956a919043c19d19bd988991d15c59f179d413fe5145ed9deb43"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:71cf1ea16acdebe9e2fb62ee7a77f8f70e877bebcbb33b34e660af2eb6d341d9"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a3692d4ab36d44685f61326dca539975a4eda49b2a76f0a3df177d8a2c0de9d2"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-win32.whl", hash = "sha256:09227bd402caa4397ba1d6e239deea635703b042dd266a4092548661fb22b9c6"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-win_amd64.whl", hash = "sha256:0f05b7b95f9f87254b53fa92048367a8232c26cee7fc8665e4337268c3919def"}, + {file = "rapidfuzz-3.12.2-cp310-cp310-win_arm64.whl", hash = "sha256:6938738e00d9eb6e04097b3f565097e20b0c398f9c58959a2bc64f7f6be3d9da"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9c4d984621ae17404c58f8d06ed8b025e167e52c0e6a511dfec83c37e9220cd"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f9132c55d330f0a1d34ce6730a76805323a6250d97468a1ca766a883d6a9a25"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b343b6cb4b2c3dbc8d2d4c5ee915b6088e3b144ddf8305a57eaab16cf9fc74"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24081077b571ec4ee6d5d7ea0e49bc6830bf05b50c1005028523b9cd356209f3"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c988a4fc91856260355773bf9d32bebab2083d4c6df33fafeddf4330e5ae9139"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:780b4469ee21cf62b1b2e8ada042941fd2525e45d5fb6a6901a9798a0e41153c"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edd84b0a323885493c893bad16098c5e3b3005d7caa995ae653da07373665d97"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efa22059c765b3d8778083805b199deaaf643db070f65426f87d274565ddf36a"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:095776b11bb45daf7c2973dd61cc472d7ea7f2eecfa454aef940b4675659b92f"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7e2574cf4aa86065600b664a1ac7b8b8499107d102ecde836aaaa403fc4f1784"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d5a3425a6c50fd8fbd991d8f085ddb504791dae6ef9cc3ab299fea2cb5374bef"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:97fb05e1ddb7b71a054040af588b0634214ee87cea87900d309fafc16fd272a4"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-win32.whl", hash = "sha256:b4c5a0413589aef936892fbfa94b7ff6f7dd09edf19b5a7b83896cc9d4e8c184"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-win_amd64.whl", hash = "sha256:58d9ae5cf9246d102db2a2558b67fe7e73c533e5d769099747921232d88b9be2"}, + {file = "rapidfuzz-3.12.2-cp311-cp311-win_arm64.whl", hash = "sha256:7635fe34246cd241c8e35eb83084e978b01b83d5ef7e5bf72a704c637f270017"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1d982a651253ffe8434d9934ff0c1089111d60502228464721a2a4587435e159"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:02e6466caa0222d5233b1f05640873671cd99549a5c5ba4c29151634a1e56080"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e956b3f053e474abae69ac693a52742109d860ac2375fe88e9387d3277f4c96c"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dee7d740a2d5418d4f964f39ab8d89923e6b945850db833e798a1969b19542a"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a057cdb0401e42c84b6516c9b1635f7aedd5e430c6e388bd5f6bcd1d6a0686bb"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dccf8d4fb5b86d39c581a59463c596b1d09df976da26ff04ae219604223d502f"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21d5b3793c6f5aecca595cd24164bf9d3c559e315ec684f912146fc4e769e367"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:46a616c0e13cff2de1761b011e0b14bb73b110182f009223f1453d505c9a975c"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19fa5bc4301a1ee55400d4a38a8ecf9522b0391fc31e6da5f4d68513fe5c0026"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:544a47190a0d25971658a9365dba7095397b4ce3e897f7dd0a77ca2cf6fa984e"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:f21af27c5e001f0ba1b88c36a0936437dfe034c452548d998891c21125eb640f"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b63170d9db00629b5b3f2862114d8d6ee19127eaba0eee43762d62a25817dbe0"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-win32.whl", hash = "sha256:6c7152d77b2eb6bfac7baa11f2a9c45fd5a2d848dbb310acd0953b3b789d95c9"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-win_amd64.whl", hash = "sha256:1a314d170ee272ac87579f25a6cf8d16a031e1f7a7b07663434b41a1473bc501"}, + {file = "rapidfuzz-3.12.2-cp312-cp312-win_arm64.whl", hash = "sha256:d41e8231326e94fd07c4d8f424f6bed08fead6f5e6688d1e6e787f1443ae7631"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:941f31038dba5d3dedcfcceba81d61570ad457c873a24ceb13f4f44fcb574260"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fe2dfc454ee51ba168a67b1e92b72aad251e45a074972cef13340bbad2fd9438"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78fafaf7f5a48ee35ccd7928339080a0136e27cf97396de45259eca1d331b714"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0c7989ff32c077bb8fd53253fd6ca569d1bfebc80b17557e60750e6909ba4fe"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96fa00bc105caa34b6cd93dca14a29243a3a7f0c336e4dcd36348d38511e15ac"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bccfb30c668620c5bc3490f2dc7d7da1cca0ead5a9da8b755e2e02e2ef0dff14"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f9b0adc3d894beb51f5022f64717b6114a6fabaca83d77e93ac7675911c8cc5"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:32691aa59577f42864d5535cb6225d0f47e2c7bff59cf4556e5171e96af68cc1"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:758b10380ad34c1f51753a070d7bb278001b5e6fcf544121c6df93170952d705"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:50a9c54c0147b468363119132d514c5024fbad1ed8af12bd8bd411b0119f9208"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e3ceb87c11d2d0fbe8559bb795b0c0604b84cfc8bb7b8720b5c16e9e31e00f41"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f7c9a003002434889255ff5676ca0f8934a478065ab5e702f75dc42639505bba"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-win32.whl", hash = "sha256:cf165a76870cd875567941cf861dfd361a0a6e6a56b936c5d30042ddc9def090"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-win_amd64.whl", hash = "sha256:55bcc003541f5f16ec0a73bf6de758161973f9e8d75161954380738dd147f9f2"}, + {file = "rapidfuzz-3.12.2-cp313-cp313-win_arm64.whl", hash = "sha256:69f6ecdf1452139f2b947d0c169a605de578efdb72cbb2373cb0a94edca1fd34"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c852cd8bed1516a64fd6e2d4c6f270d4356196ee03fda2af1e5a9e13c34643"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42e7f747b55529a6d0d1588695d71025e884ab48664dca54b840413dea4588d8"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a749fd2690f24ef256b264a781487746bbb95344364fe8fe356f0eef7ef206ba"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a11e1d036170bbafa43a9e63d8c309273564ec5bdfc5439062f439d1a16965a"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dfb337f1832c1231e3d5621bd0ebebb854e46036aedae3e6a49c1fc08f16f249"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e88c6e68fca301722fa3ab7fd3ca46998012c14ada577bc1e2c2fc04f2067ca6"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17e1a3a8b4b5125cfb63a6990459b25b87ea769bdaf90d05bb143f8febef076a"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b9f8177b24ccc0a843e85932b1088c5e467a7dd7a181c13f84c684b796bea815"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6c506bdc2f304051592c0d3b0e82eed309248ec10cdf802f13220251358375ea"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:30bf15c1ecec2798b713d551df17f23401a3e3653ad9ed4e83ad1c2b06e86100"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:bd9a67cfc83e8453ef17ddd1c2c4ce4a74d448a197764efb54c29f29fb41f611"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7a6eaec2ef658dd650c6eb9b36dff7a361ebd7d8bea990ce9d639b911673b2cb"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-win32.whl", hash = "sha256:d7701769f110332cde45c41759cb2a497de8d2dca55e4c519a46aed5fbb19d1a"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-win_amd64.whl", hash = "sha256:296bf0fd4f678488670e262c87a3e4f91900b942d73ae38caa42a417e53643b1"}, + {file = "rapidfuzz-3.12.2-cp39-cp39-win_arm64.whl", hash = "sha256:7957f5d768de14f6b2715303ccdf224b78416738ee95a028a2965c95f73afbfb"}, + {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5fd3ce849b27d063755829cda27a9dab6dbd63be3801f2a40c60ec563a4c90f"}, + {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:54e53662d71ed660c83c5109127c8e30b9e607884b7c45d2aff7929bbbd00589"}, + {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b9e43cf2213e524f3309d329f1ad8dbf658db004ed44f6ae1cd2919aa997da5"}, + {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29ca445e320e5a8df3bd1d75b4fa4ecfa7c681942b9ac65b55168070a1a1960e"}, + {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83eb7ef732c2f8533c6b5fbe69858a722c218acc3e1fc190ab6924a8af7e7e0e"}, + {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:648adc2dd2cf873efc23befcc6e75754e204a409dfa77efd0fea30d08f22ef9d"}, + {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9b1e6f48e1ffa0749261ee23a1c6462bdd0be5eac83093f4711de17a42ae78ad"}, + {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:1ae9ded463f2ca4ba1eb762913c5f14c23d2e120739a62b7f4cc102eab32dc90"}, + {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dda45f47b559be72ecbce45c7f71dc7c97b9772630ab0f3286d97d2c3025ab71"}, + {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3745c6443890265513a3c8777f2de4cb897aeb906a406f97741019be8ad5bcc"}, + {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36d3ef4f047ed1bc96fa29289f9e67a637ddca5e4f4d3dc7cb7f50eb33ec1664"}, + {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:54bb69ebe5ca0bd7527357e348f16a4c0c52fe0c2fcc8a041010467dcb8385f7"}, + {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3f2ddd5b99b254039a8c82be5749d4d75943f62eb2c2918acf6ffd586852834f"}, + {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:8117dab9b26a1aaffab59b4e30f80ac4d55e61ad4139a637c149365960933bee"}, + {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40c0f16d62d6553527de3dab2fb69709c4383430ea44bce8fb4711ed4cbc6ae3"}, + {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f177e1eb6e4f5261a89c475e21bce7a99064a8f217d2336fb897408f46f0ceaf"}, + {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df0cecc2852fcb078ed1b4482fac4fc2c2e7787f3edda8920d9a4c0f51b1c95"}, + {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b3c4df0321df6f8f0b61afbaa2ced9622750ee1e619128db57a18533d139820"}, + {file = "rapidfuzz-3.12.2.tar.gz", hash = "sha256:b0ba1ccc22fff782e7152a3d3d0caca44ec4e32dc48ba01c560b8593965b5aa3"}, +] + +[package.extras] +all = ["numpy"] + [[package]] name = "redis" version = "5.0.8" @@ -6992,9 +7099,9 @@ files = [ ] [package.extras] -nativelib = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\"", "pywin32 ; sys_platform == \"win32\""] -objc = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\""] -win32 = ["pywin32 ; sys_platform == \"win32\""] +nativelib = ["pyobjc-framework-Cocoa", "pywin32"] +objc = ["pyobjc-framework-Cocoa"] +win32 = ["pywin32"] [[package]] name = "setuptools" @@ -7009,9 +7116,9 @@ files = [ ] [package.extras] -core = ["importlib-metadata (>=6) ; python_version < \"3.10\"", "importlib-resources (>=5.10.2) ; python_version < \"3.9\"", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (<0.4) ; platform_system == \"Windows\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.3.2) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "shellingham" @@ -7195,6 +7302,21 @@ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"] typing = ["mypy (>=1.6,<2.0)", "traitlets (>=5.11.1)"] +[[package]] +name = "thefuzz" +version = "0.22.1" +description = "Fuzzy string matching in python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "thefuzz-0.22.1-py3-none-any.whl", hash = "sha256:59729b33556850b90e1093c4cf9e618af6f2e4c985df193fdf3c5b5cf02ca481"}, + {file = "thefuzz-0.22.1.tar.gz", hash = "sha256:7138039a7ecf540da323792d8592ef9902b1d79eb78c147d4f20664de79f3680"}, +] + +[package.dependencies] +rapidfuzz = ">=3.0.0,<4.0.0" + [[package]] name = "tiktoken" version = "0.7.0" @@ -7396,7 +7518,7 @@ files = [ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] -markers = {main = "python_version <= \"3.10\""} +markers = {main = "python_version < \"3.11\""} [[package]] name = "tomli" @@ -7405,7 +7527,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version <= \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -7698,8 +7820,8 @@ files = [ ] [package.extras] -brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -7722,12 +7844,12 @@ httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -7736,7 +7858,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"" +markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\"" files = [ {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9ebafa0b96c62881d5cafa02d9da2e44c23f9f0cd829f3a32a6aff771449c996"}, {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:35968fc697b0527a06e134999eef859b4034b37aebca537daeb598b9d45a137b"}, @@ -7773,7 +7895,7 @@ files = [ [package.extras] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0) ; python_version >= \"3.12\"", "aiohttp (>=3.8.1) ; python_version < \"3.12\"", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "vcrpy" @@ -7783,7 +7905,6 @@ optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "vcrpy-6.0.1-py2.py3-none-any.whl", hash = "sha256:621c3fb2d6bd8aa9f87532c688e4575bcbbde0c0afeb5ebdb7e14cac409edfdd"}, {file = "vcrpy-6.0.1.tar.gz", hash = "sha256:9e023fee7f892baa0bbda2f7da7c8ac51165c1c6e38ff8688683a12a4bde9278"}, ] @@ -8457,14 +8578,14 @@ files = [ markers = {dev = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "5bb9e01fd3caa4e768a907809651e0477d9416971ef63988323bfdc8f0302325" +content-hash = "1fb88649c8e32945d93894586aaca712ec2dfe8276d5e23393f1940297816185" diff --git a/pyproject.toml b/pyproject.toml index 0e6b7653..0164ca8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ requests = "^2.32.0" litellm = {url = "https://github.com/HumanSignal/litellm/archive/c0506d5844ef20d0db14144fbcbf99c05637bde3.zip"} pandarallel = "^1.6.5" instructor = "^1.4.3" +thefuzz = "0.22.1" [tool.poetry.group.dev.dependencies] pytest = "^7.4.3" From e235c64dd61b63d5c0e658cc5e3018aab161daf9 Mon Sep 17 00:00:00 2001 From: nik Date: Fri, 14 Mar 2025 14:52:57 +0000 Subject: [PATCH 06/10] WIP: working stuff with Labels --- .../collection/label_studio_image_ocr.py | 228 ++++++++++++++---- 1 file changed, 178 insertions(+), 50 deletions(-) diff --git a/adala/skills/collection/label_studio_image_ocr.py b/adala/skills/collection/label_studio_image_ocr.py index fd41c7cf..1bde9aa3 100644 --- a/adala/skills/collection/label_studio_image_ocr.py +++ b/adala/skills/collection/label_studio_image_ocr.py @@ -18,7 +18,7 @@ from adala.runtimes._litellm import MessageChunkType from pydantic import BaseModel, Field, model_validator, computed_field from difflib import SequenceMatcher - +import numpy as np from adala.runtimes import Runtime, AsyncRuntime from adala.utils.internal_data import InternalDataFrame @@ -28,6 +28,7 @@ from label_studio_sdk._extensions.label_studio_tools.core.utils.json_schema import ( json_schema_to_pydantic, ) +from .match_bbox_by_text import find_text_in_image logger = logging.getLogger(__name__) @@ -490,6 +491,12 @@ def _filter_ocr_results_v2(self, ocr_results: Dict, reference_texts: List[str]) output[ref_text]['scores'] = filtered_scores return output + + + def _get_labels(self) -> List[str]: + # TODO: validate labels are coming from tag, use control tag name + # format: {'StartDate': LabelTag(attr={'value': 'StartDate', 'background': 'red'}, tag='Label', value='StartDate', parent_name='columns'), 'EndDate': LabelTag(attr={'value': 'EndDate', 'background': 'green'}, tag='Label', value='EndDate', parent_name='columns'), 'Amount': LabelTag(attr={'value': 'Amount'}, tag='Label', value='Amount', parent_name='columns')} + return list(self.label_interface.labels)[0] async def aapply( @@ -497,55 +504,176 @@ async def aapply( input: InternalDataFrame, runtime: AsyncRuntime, ) -> InternalDataFrame: - - with json_schema_to_pydantic(self.field_schema) as ResponseModel: - # special handling to flag image inputs if they exist - input_field_types = defaultdict(lambda: MessageChunkType.TEXT) - image_value_key = None - for tag in self.image_tags: - # these are the project variable names, NOT the label config tag names. TODO: pass this info from LSE to avoid recomputing it here. - variables = extract_variable_name(tag.value) - if len(variables) != 1: - logger.warning( - f"Image tag {tag.name} has multiple variables: {variables}. Cannot mark these variables as image inputs." - ) - continue - image_value_key = variables[0] - input_field_types[image_value_key] = ( - MessageChunkType.IMAGE_URLS - if tag.attr.get("valueList") - else MessageChunkType.IMAGE_URL + + labels = self._get_labels() + # validate labels + from adala.utils.pydantic_generator import field_schema_to_pydantic_class + LineItem = field_schema_to_pydantic_class( + class_name="LineItem", + description="A single line extracted from the document", + field_schema={label: {"type": "string"} for label in labels} + ) + + class ResponseModel(BaseModel): + lines: List[LineItem] + + input_field_types = defaultdict(lambda: MessageChunkType.TEXT) + image_value_key = None + for tag in self.image_tags: + # these are the project variable names, NOT the label config tag names. TODO: pass this info from LSE to avoid recomputing it here. + variables = extract_variable_name(tag.value) + if len(variables) != 1: + logger.warning( + f"Image tag {tag.name} has multiple variables: {variables}. Cannot mark these variables as image inputs." ) - - logger.debug( - f"Using VisionRuntime with input field types: {input_field_types}" - ) - output = await runtime.batch_to_batch( - input, - input_template=self.input_template, - output_template="", - instructions_template=self.instructions, - response_model=ResponseModel, - input_field_types=input_field_types, + continue + image_value_key = variables[0] + input_field_types[image_value_key] = ( + MessageChunkType.IMAGE_URLS + if tag.attr.get("valueList") + else MessageChunkType.IMAGE_URL ) - print(f'Output: {output}') - print(f'Process images with OCR: {input[image_value_key].tolist()}') - ocr_results = await self.process_images_with_ocr(input[image_value_key].tolist()) - filtered_ocr_results = [] - for i, row in output.iterrows(): - extracted_result = row['output'] - filtered_ocr_result = { - 'image_width': ocr_results[i]['image_width'], - 'image_height': ocr_results[i]['image_height'], - 'ocr_data': self._filter_ocr_results_v2(ocr_results[i]['ocr_data'], extracted_result) + + logger.debug( + f"Using VisionRuntime with input field types: {input_field_types}" + ) + output = await runtime.batch_to_batch( + input, + input_template=self.input_template, + output_template="", + instructions_template=self.instructions, + response_model=ResponseModel, + input_field_types=input_field_types, + ) + print(f'Output: {output}') + + images = input[image_value_key].tolist() + all_bbox_annotations = [] + all_text_annotations = [] + all_label_annotations = [] + for i, row in output.iterrows(): + extracted_results = row['lines'] + + ocr_results = find_text_in_image(images[i], extracted_results) + bbox_annotations = [] + text_annotations = [] + label_annotations = [] + for ocr_result in ocr_results: + # Add bbox annotation + + bbox_id = ocr_result['element']['id'] + parent_id = ocr_result['element'].get('parent_id') + + bbox_annotation = ocr_result['element'] + bbox_annotation['score'] = ocr_result['matching_score'] * ocr_result['element']['score'] + + bbox_annotations.append(ocr_result['element']) + + # Add text annotation + text_annotation = { + 'text': [ocr_result['reference_text']], + 'id': bbox_id } - filtered_ocr_results.append(filtered_ocr_result) - print(f'Filtered OCR results: {filtered_ocr_results}') - # convert filtered OCR results to Label Studio format - # bbox_annotations, text_annotations = self._convert_ocr_results_to_label_studio_format(filtered_ocr_results) - bbox_annotations, text_annotations = self._convert_ocr_results_to_label_studio_format_v2(filtered_ocr_results) - print(f'Bbox annotations: {bbox_annotations}') - print(f'Text annotations: {text_annotations}') - output['bbox'] = bbox_annotations - output['transcription'] = text_annotations - return output + if parent_id: + text_annotation['parent_id'] = parent_id + text_annotations.append(text_annotation) + + label = ocr_result.pop('reference_label', None) + + if label: + label_annotation = { + 'labels': [label], + 'id': bbox_id + } + if parent_id: + label_annotation['parent_id'] = parent_id + label_annotations.append(label_annotation) + + all_bbox_annotations.append(bbox_annotations) + all_text_annotations.append(text_annotations) + all_label_annotations.append(label_annotations) + output['bbox'] = all_bbox_annotations + output['transcription'] = all_text_annotations + output['columns'] = all_label_annotations + return output + + + # with json_schema_to_pydantic(self.field_schema) as ResponseModel: + # # special handling to flag image inputs if they exist + # input_field_types = defaultdict(lambda: MessageChunkType.TEXT) + # image_value_key = None + # for tag in self.image_tags: + # # these are the project variable names, NOT the label config tag names. TODO: pass this info from LSE to avoid recomputing it here. + # variables = extract_variable_name(tag.value) + # if len(variables) != 1: + # logger.warning( + # f"Image tag {tag.name} has multiple variables: {variables}. Cannot mark these variables as image inputs." + # ) + # continue + # image_value_key = variables[0] + # input_field_types[image_value_key] = ( + # MessageChunkType.IMAGE_URLS + # if tag.attr.get("valueList") + # else MessageChunkType.IMAGE_URL + # ) + + # logger.debug( + # f"Using VisionRuntime with input field types: {input_field_types}" + # ) + # output = await runtime.batch_to_batch( + # input, + # input_template=self.input_template, + # output_template="", + # instructions_template=self.instructions, + # response_model=ResponseModel, + # input_field_types=input_field_types, + # ) + # print(f'Output: {output}') + # print(f'Process images with OCR: {input[image_value_key].tolist()}') + # # ocr_results = await self.process_images_with_ocr(input[image_value_key].tolist()) + # # filtered_ocr_results = [] + # images = input[image_value_key].tolist() + # all_bbox_annotations = [] + # all_text_annotations = [] + # for i, row in output.iterrows(): + # extracted_result = row['output'] + + # ocr_results = find_text_in_image(images[i], extracted_result) + # bbox_annotations = [] + # text_annotations = [] + # for ocr_result in ocr_results: + # # Convert OCR results to Label Studio format + # parent_id = ocr_result['bbox']['id'] + + # for word in ocr_result['words']: + # bbox_annotation = word['bbox'] + # bbox_annotation['rotation'] = 0 + # bbox_annotation['parent_id'] = parent_id + # bbox_annotation['score'] = word['score'] + + # text_annotation = { + # 'text': [word['text']], + # 'id': bbox_annotation['id'], + # 'parent_id': parent_id + # } + + # bbox_annotations.append(bbox_annotation) + # text_annotations.append(text_annotation) + + # bbox_annotation = ocr_result['bbox'] + # bbox_annotation['rotation'] = 0 + # bbox_annotation['score'] = float(np.sqrt(ocr_result['detection_score'] * ocr_result['matching_score'])) + + # text_annotation = { + # 'text': [ocr_result['reference_text']], + # 'id': parent_id + # } + + # bbox_annotations.append(bbox_annotation) + # text_annotations.append(text_annotation) + + # all_bbox_annotations.append(bbox_annotations) + # all_text_annotations.append(text_annotations) + # output['bbox'] = all_bbox_annotations + # output['transcription'] = all_text_annotations + # return output From 08f7cd277a01ff432a715f87a0a9b58ddbd8e536 Mon Sep 17 00:00:00 2001 From: niklub Date: Sat, 15 Mar 2025 13:08:21 +0000 Subject: [PATCH 07/10] Sync Follow Merge dependencies Workflow run: https://github.com/HumanSignal/Adala/actions/runs/13873160528 --- poetry.lock | 7 ++++--- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 384fda43..8427a586 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3696,7 +3696,7 @@ optional = false python-versions = ">=3.9,<4" groups = ["main"] files = [ - {file = "5cc1d11e0afbaae6d597c1cbe12805b02af82aac.zip", hash = "sha256:99362c76b98c84e3d24aa6b194768e92c27788216c97663036dcf661c69eaf0b"}, + {file = "45a8a21065f0e2653857ebb52b8e10ef47856406.zip", hash = "sha256:372fd478fd0ddbd7fe004c04121fc8a8b45da1d03022dd3ef58096e7a13825ba"}, ] [package.dependencies] @@ -3722,7 +3722,7 @@ xmljson = "0.2.1" [package.source] type = "url" -url = "https://github.com/HumanSignal/label-studio-sdk/archive/5cc1d11e0afbaae6d597c1cbe12805b02af82aac.zip" +url = "https://github.com/HumanSignal/label-studio-sdk/archive/45a8a21065f0e2653857ebb52b8e10ef47856406.zip" [[package]] name = "litellm" @@ -7905,6 +7905,7 @@ optional = false python-versions = ">=3.8" groups = ["dev"] files = [ + {file = "vcrpy-6.0.1-py2.py3-none-any.whl", hash = "sha256:621c3fb2d6bd8aa9f87532c688e4575bcbbde0c0afeb5ebdb7e14cac409edfdd"}, {file = "vcrpy-6.0.1.tar.gz", hash = "sha256:9e023fee7f892baa0bbda2f7da7c8ac51165c1c6e38ff8688683a12a4bde9278"}, ] @@ -8588,4 +8589,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "1fb88649c8e32945d93894586aaca712ec2dfe8276d5e23393f1940297816185" +content-hash = "a7e509bc9fa5f39e65602b640489efb8e748502530428f1035ea5597632523c3" diff --git a/pyproject.toml b/pyproject.toml index 0164ca8a..c5430455 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ celery = {version = "^5.3.6", extras = ["redis"]} kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery uvicorn = "*" pydantic-settings = "^2.2.1" -label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/5cc1d11e0afbaae6d597c1cbe12805b02af82aac.zip"} +label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/45a8a21065f0e2653857ebb52b8e10ef47856406.zip"} kafka-python-ng = "^2.2.3" requests = "^2.32.0" # Using litellm from forked repo until vertex fix is released: https://github.com/BerriAI/litellm/issues/7904 From ae1228fb63f57f309e8df5695bfac08b56fa9d70 Mon Sep 17 00:00:00 2001 From: niklub Date: Sat, 15 Mar 2025 13:10:48 +0000 Subject: [PATCH 08/10] Sync Follow Merge dependencies Workflow run: https://github.com/HumanSignal/Adala/actions/runs/13873181688 --- poetry.lock | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8427a586..fcc39697 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3696,7 +3696,7 @@ optional = false python-versions = ">=3.9,<4" groups = ["main"] files = [ - {file = "45a8a21065f0e2653857ebb52b8e10ef47856406.zip", hash = "sha256:372fd478fd0ddbd7fe004c04121fc8a8b45da1d03022dd3ef58096e7a13825ba"}, + {file = "e3f6c7061c8798c91fce779ec7f4571e985e3810.zip", hash = "sha256:d89db23bfc023a38c8605fe3d661a2e21dee0522541d8ebca73be783ab9219a6"}, ] [package.dependencies] @@ -3722,7 +3722,7 @@ xmljson = "0.2.1" [package.source] type = "url" -url = "https://github.com/HumanSignal/label-studio-sdk/archive/45a8a21065f0e2653857ebb52b8e10ef47856406.zip" +url = "https://github.com/HumanSignal/label-studio-sdk/archive/e3f6c7061c8798c91fce779ec7f4571e985e3810.zip" [[package]] name = "litellm" @@ -8589,4 +8589,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "a7e509bc9fa5f39e65602b640489efb8e748502530428f1035ea5597632523c3" +content-hash = "a10c0d67b278861c3b9c8f6172f9aadff4c0c02f9d0d8fc1d3720b3a6f0c2104" diff --git a/pyproject.toml b/pyproject.toml index c5430455..68418573 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ celery = {version = "^5.3.6", extras = ["redis"]} kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery uvicorn = "*" pydantic-settings = "^2.2.1" -label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/45a8a21065f0e2653857ebb52b8e10ef47856406.zip"} +label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/e3f6c7061c8798c91fce779ec7f4571e985e3810.zip"} kafka-python-ng = "^2.2.3" requests = "^2.32.0" # Using litellm from forked repo until vertex fix is released: https://github.com/BerriAI/litellm/issues/7904 From cdf6b14f47e8680a18565a88f9080c94e2eda08a Mon Sep 17 00:00:00 2001 From: nik Date: Mon, 17 Mar 2025 16:39:35 +0000 Subject: [PATCH 09/10] Update deps --- Dockerfile.app | 3 +- poetry.lock | 157 +++++++++++++++++++++++++++---------------------- pyproject.toml | 1 + 3 files changed, 89 insertions(+), 72 deletions(-) diff --git a/Dockerfile.app b/Dockerfile.app index 6d4c6433..231eb7e4 100644 --- a/Dockerfile.app +++ b/Dockerfile.app @@ -32,7 +32,8 @@ RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \ set -eux; \ apt-get update; \ apt-get upgrade -y; \ - apt-get install --no-install-recommends -y procps; \ + apt-get install --no-install-recommends -y \ + procps tesseract-ocr tesseract-ocr-chi-sim tesseract-ocr-chi-tra tesseract-ocr-deu; \ apt-get autoremove -y RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \ diff --git a/poetry.lock b/poetry.lock index fcc39697..581e267f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -123,7 +123,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.12.0,<2.0" [package.extras] -speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] [[package]] name = "aiokafka" @@ -206,8 +206,8 @@ files = [ ] [package.extras] -dev = ["aiounittest (==1.4.1) ; python_version < \"3.8\"", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] -docs = ["sphinx (==6.1.3) ; python_version >= \"3.8\"", "sphinx-mdinclude (==0.5.3)"] +dev = ["aiounittest (==1.4.1)", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] +docs = ["sphinx (==6.1.3)", "sphinx-mdinclude (==0.5.3)"] [[package]] name = "amqp" @@ -256,7 +256,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -412,8 +412,8 @@ files = [ six = ">=1.12.0" [package.extras] -astroid = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\""] -test = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\"", "pytest"] +astroid = ["astroid (>=1,<2)", "astroid (>=2,<4)"] +test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] [[package]] name = "async-lru" @@ -455,12 +455,12 @@ files = [ ] [package.extras] -benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] -cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\""] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "babel" @@ -607,7 +607,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -694,7 +694,7 @@ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} [package.extras] docs = ["furo (>=2023.08.17)", "sphinx (>=7.0,<8.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)", "sphinx-issues (>=3.0.0)"] -test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0) ; python_version < \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.11\"", "setuptools (>=67.8.0) ; python_version >= \"3.12\"", "wheel (>=0.36.0)"] +test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0)", "setuptools (>=56.0.0)", "setuptools (>=56.0.0)", "setuptools (>=67.8.0)", "wheel (>=0.36.0)"] typing = ["build[uv]", "importlib-metadata (>=5.1)", "mypy (>=1.9.0,<1.10.0)", "tomli", "typing-extensions (>=3.7.4.3)"] uv = ["uv (>=0.1.18)"] virtualenv = ["virtualenv (>=20.0.35)"] @@ -740,32 +740,32 @@ vine = ">=5.1.0,<6.0" arangodb = ["pyArango (>=2.0.2)"] auth = ["cryptography (==42.0.5)"] azureblockblob = ["azure-storage-blob (>=12.15.0)"] -brotli = ["brotli (>=1.0.0) ; platform_python_implementation == \"CPython\"", "brotlipy (>=0.7.0) ; platform_python_implementation == \"PyPy\""] +brotli = ["brotli (>=1.0.0)", "brotlipy (>=0.7.0)"] cassandra = ["cassandra-driver (>=3.25.0,<4)"] consul = ["python-consul2 (==0.1.5)"] cosmosdbsql = ["pydocumentdb (==2.3.5)"] -couchbase = ["couchbase (>=3.0.0) ; platform_python_implementation != \"PyPy\" and (platform_system != \"Windows\" or python_version < \"3.10\")"] +couchbase = ["couchbase (>=3.0.0)"] couchdb = ["pycouchdb (==1.14.2)"] django = ["Django (>=2.2.28)"] dynamodb = ["boto3 (>=1.26.143)"] elasticsearch = ["elastic-transport (<=8.13.0)", "elasticsearch (<=8.13.0)"] -eventlet = ["eventlet (>=0.32.0) ; python_version < \"3.10\""] +eventlet = ["eventlet (>=0.32.0)"] gcs = ["google-cloud-storage (>=2.10.0)"] gevent = ["gevent (>=1.5.0)"] -librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] -memcache = ["pylibmc (==1.6.3) ; platform_system != \"Windows\""] +librabbitmq = ["librabbitmq (>=2.0.0)"] +memcache = ["pylibmc (==1.6.3)"] mongodb = ["pymongo[srv] (>=4.0.2)"] msgpack = ["msgpack (==1.0.8)"] pymemcache = ["python-memcached (>=1.61)"] -pyro = ["pyro4 (==4.82) ; python_version < \"3.11\""] +pyro = ["pyro4 (==4.82)"] pytest = ["pytest-celery[all] (>=1.0.0)"] redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] s3 = ["boto3 (>=1.26.143)"] slmq = ["softlayer-messaging (>=1.0.3)"] -solar = ["ephem (==4.1.5) ; platform_python_implementation != \"PyPy\""] +solar = ["ephem (==4.1.5)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] -tblib = ["tblib (>=1.3.0) ; python_version < \"3.8.0\"", "tblib (>=1.5.0) ; python_version >= \"3.8.0\""] +sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] +tblib = ["tblib (>=1.3.0)", "tblib (>=1.5.0)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=1.3.1)"] zstd = ["zstandard (==0.22.0)"] @@ -1208,10 +1208,10 @@ files = [ ] [package.extras] -avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests"] -dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "flake8", "pytest (==4.6.4) ; python_version < \"3.0\"", "pytest ; python_version >= \"3.0\"", "pytest-timeout", "requests"] -doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests", "sphinx", "sphinx-rtd-theme"] -json = ["jsonschema", "pyrsistent (==0.16.1) ; python_version < \"3.0\"", "pyrsistent ; python_version > \"3.0\"", "requests"] +avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests"] +dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "flake8", "pytest", "pytest (==4.6.4)", "pytest-timeout", "requests"] +doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests", "sphinx", "sphinx-rtd-theme"] +json = ["jsonschema", "pyrsistent", "pyrsistent (==0.16.1)", "requests"] protobuf = ["protobuf", "requests"] schema-registry = ["requests"] @@ -1386,7 +1386,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli ; python_full_version <= \"3.11.0a6\""] +toml = ["tomli"] [[package]] name = "cryptography" @@ -1476,8 +1476,8 @@ jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, - {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version == \"3.10\""}, - {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, + {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""}, ] pyyaml = ">=6.0.1" @@ -1522,7 +1522,7 @@ xxhash = "*" apache-beam = ["apache-beam (>=2.26.0)"] audio = ["librosa", "soundfile (>=0.12.1)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] @@ -1530,7 +1530,7 @@ quality = ["ruff (>=0.3.0)"] s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.6.0)"] tensorflow-gpu = ["tensorflow (>=2.6.0)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=6.2.1)"] @@ -1738,7 +1738,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version <= \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -1760,7 +1760,7 @@ files = [ ] [package.extras] -tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] +tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] [[package]] name = "faker" @@ -1853,7 +1853,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] +typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "flatbuffers" @@ -1945,18 +1945,18 @@ files = [ ] [package.extras] -all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] graphite = ["lz4 (>=1.7.4.2)"] -interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] +interpolatable = ["munkres", "pycairo", "scipy"] lxml = ["lxml (>=4.0)"] pathops = ["skia-pathops (>=0.5.0)"] plot = ["matplotlib"] repacker = ["uharfbuzz (>=0.23.0)"] symfont = ["sympy"] -type1 = ["xattr ; sys_platform == \"darwin\""] +type1 = ["xattr"] ufo = ["fs (>=2.2.0,<3)"] -unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""] -woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] [[package]] name = "fqdn" @@ -2531,7 +2531,7 @@ idna = "*" sniffio = "*" [package.extras] -brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +brotli = ["brotli", "brotlicffi"] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -2740,7 +2740,7 @@ zipp = ">=0.5" [package.extras] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -test = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] [[package]] name = "importlib-resources" @@ -2759,7 +2759,7 @@ markers = {dev = "python_version < \"3.10\""} zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] @@ -2780,7 +2780,7 @@ files = [ [package.extras] docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] -testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\""] +testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [[package]] name = "iniconfig" @@ -3218,7 +3218,7 @@ traitlets = ">=5.3" [package.extras] docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko ; sys_platform == \"win32\"", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] +test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] [[package]] name = "jupyter-console" @@ -3648,7 +3648,7 @@ azureservicebus = ["azure-servicebus (>=7.10.0)"] azurestoragequeues = ["azure-identity (>=1.12.0)", "azure-storage-queue (>=12.6.0)"] confluentkafka = ["confluent-kafka (>=2.2.0)"] consul = ["python-consul2 (==0.1.5)"] -librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] +librabbitmq = ["librabbitmq (>=2.0.0)"] mongodb = ["pymongo (>=4.1.1)"] msgpack = ["msgpack (==1.1.0)"] pyro = ["pyro4 (==4.82)"] @@ -3656,7 +3656,7 @@ qpid = ["qpid-python (>=0.26)", "qpid-tools (>=0.26)"] redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2)"] slmq = ["softlayer-messaging (>=1.0.3)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] +sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=2.8.0)"] @@ -4191,7 +4191,7 @@ watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] [[package]] name = "mkdocs-autorefs" @@ -4508,7 +4508,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] +gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] [[package]] @@ -4529,7 +4529,7 @@ PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.14,<0.18) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.18) ; python_version >= \"3.8\" and platform_system == \"Darwin\""] +broker = ["pymsalruntime (>=0.14,<0.18)", "pymsalruntime (>=0.17,<0.18)"] [[package]] name = "multidict" @@ -4817,7 +4817,7 @@ tornado = ">=6.2.0" [package.extras] dev = ["hatch", "pre-commit"] docs = ["myst-parser", "nbsphinx", "pydata-sphinx-theme", "sphinx (>=1.3.6)", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["importlib-resources (>=5.0) ; python_version < \"3.10\"", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] +test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] [[package]] name = "notebook-shim" @@ -5524,7 +5524,7 @@ docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions ; python_version < \"3.10\""] +typing = ["typing-extensions"] xmp = ["defusedxml"] [[package]] @@ -5663,7 +5663,7 @@ files = [ ] [package.extras] -test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] [[package]] name = "ptyprocess" @@ -5817,7 +5817,7 @@ typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata ; python_version >= \"3.9\" and sys_platform == \"win32\""] +timezone = ["tzdata"] [[package]] name = "pydantic-core" @@ -6063,6 +6063,22 @@ files = [ [package.extras] dev = ["build", "flake8", "mypy", "pytest", "twine"] +[[package]] +name = "pytesseract" +version = "0.3.13" +description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pytesseract-0.3.13-py3-none-any.whl", hash = "sha256:7a99c6c2ac598360693d83a416e36e0b33a67638bb9d77fdcac094a3589d4b34"}, + {file = "pytesseract-0.3.13.tar.gz", hash = "sha256:4bf5f880c99406f52a3cfc2633e42d9dc67615e69d8a509d74867d3baddb5db9"}, +] + +[package.dependencies] +packaging = ">=21.3" +Pillow = ">=8.0.0" + [[package]] name = "pytest" version = "7.4.4" @@ -7099,9 +7115,9 @@ files = [ ] [package.extras] -nativelib = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\"", "pywin32 ; sys_platform == \"win32\""] -objc = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\""] -win32 = ["pywin32 ; sys_platform == \"win32\""] +nativelib = ["pyobjc-framework-Cocoa", "pywin32"] +objc = ["pyobjc-framework-Cocoa"] +win32 = ["pywin32"] [[package]] name = "setuptools" @@ -7116,9 +7132,9 @@ files = [ ] [package.extras] -core = ["importlib-metadata (>=6) ; python_version < \"3.10\"", "importlib-resources (>=5.10.2) ; python_version < \"3.9\"", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (<0.4) ; platform_system == \"Windows\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.3.2) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "shellingham" @@ -7518,7 +7534,7 @@ files = [ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] -markers = {main = "python_version <= \"3.10\""} +markers = {main = "python_version < \"3.11\""} [[package]] name = "tomli" @@ -7527,7 +7543,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version <= \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -7820,8 +7836,8 @@ files = [ ] [package.extras] -brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -7844,12 +7860,12 @@ httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -7858,7 +7874,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"" +markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\"" files = [ {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9ebafa0b96c62881d5cafa02d9da2e44c23f9f0cd829f3a32a6aff771449c996"}, {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:35968fc697b0527a06e134999eef859b4034b37aebca537daeb598b9d45a137b"}, @@ -7895,7 +7911,7 @@ files = [ [package.extras] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0) ; python_version >= \"3.12\"", "aiohttp (>=3.8.1) ; python_version < \"3.12\"", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "vcrpy" @@ -7905,7 +7921,6 @@ optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "vcrpy-6.0.1-py2.py3-none-any.whl", hash = "sha256:621c3fb2d6bd8aa9f87532c688e4575bcbbde0c0afeb5ebdb7e14cac409edfdd"}, {file = "vcrpy-6.0.1.tar.gz", hash = "sha256:9e023fee7f892baa0bbda2f7da7c8ac51165c1c6e38ff8688683a12a4bde9278"}, ] @@ -8579,14 +8594,14 @@ files = [ markers = {dev = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "a10c0d67b278861c3b9c8f6172f9aadff4c0c02f9d0d8fc1d3720b3a6f0c2104" +content-hash = "72960a903c05976830225f572261c64d423402a2ef58317b2f339119c3c8f375" diff --git a/pyproject.toml b/pyproject.toml index 68418573..183d5ad8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ litellm = {url = "https://github.com/HumanSignal/litellm/archive/c0506d5844ef20d pandarallel = "^1.6.5" instructor = "^1.4.3" thefuzz = "0.22.1" +pytesseract = "^0.3.13" [tool.poetry.group.dev.dependencies] pytest = "^7.4.3" From 8a0f936f80a3eea471b9939ade71d99f34da8646 Mon Sep 17 00:00:00 2001 From: niklub Date: Mon, 17 Mar 2025 16:41:56 +0000 Subject: [PATCH 10/10] Sync Follow Merge dependencies Workflow run: https://github.com/HumanSignal/Adala/actions/runs/13904950905 --- poetry.lock | 145 +++++++++++++++++++++++++------------------------ pyproject.toml | 2 +- 2 files changed, 74 insertions(+), 73 deletions(-) diff --git a/poetry.lock b/poetry.lock index 581e267f..85b81803 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -123,7 +123,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.12.0,<2.0" [package.extras] -speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] +speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] [[package]] name = "aiokafka" @@ -206,8 +206,8 @@ files = [ ] [package.extras] -dev = ["aiounittest (==1.4.1)", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] -docs = ["sphinx (==6.1.3)", "sphinx-mdinclude (==0.5.3)"] +dev = ["aiounittest (==1.4.1) ; python_version < \"3.8\"", "attribution (==1.6.2)", "black (==23.3.0)", "coverage[toml] (==7.2.3)", "flake8 (==5.0.4)", "flake8-bugbear (==23.3.12)", "flit (==3.7.1)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"] +docs = ["sphinx (==6.1.3) ; python_version >= \"3.8\"", "sphinx-mdinclude (==0.5.3)"] [[package]] name = "amqp" @@ -256,7 +256,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""] trio = ["trio (>=0.26.1)"] [[package]] @@ -412,8 +412,8 @@ files = [ six = ">=1.12.0" [package.extras] -astroid = ["astroid (>=1,<2)", "astroid (>=2,<4)"] -test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] +astroid = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\""] +test = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\"", "pytest"] [[package]] name = "async-lru" @@ -455,12 +455,12 @@ files = [ ] [package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\""] [[package]] name = "babel" @@ -607,7 +607,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -694,7 +694,7 @@ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} [package.extras] docs = ["furo (>=2023.08.17)", "sphinx (>=7.0,<8.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)", "sphinx-issues (>=3.0.0)"] -test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0)", "setuptools (>=56.0.0)", "setuptools (>=56.0.0)", "setuptools (>=67.8.0)", "wheel (>=0.36.0)"] +test = ["build[uv,virtualenv]", "filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0) ; python_version < \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.10\"", "setuptools (>=56.0.0) ; python_version == \"3.11\"", "setuptools (>=67.8.0) ; python_version >= \"3.12\"", "wheel (>=0.36.0)"] typing = ["build[uv]", "importlib-metadata (>=5.1)", "mypy (>=1.9.0,<1.10.0)", "tomli", "typing-extensions (>=3.7.4.3)"] uv = ["uv (>=0.1.18)"] virtualenv = ["virtualenv (>=20.0.35)"] @@ -740,32 +740,32 @@ vine = ">=5.1.0,<6.0" arangodb = ["pyArango (>=2.0.2)"] auth = ["cryptography (==42.0.5)"] azureblockblob = ["azure-storage-blob (>=12.15.0)"] -brotli = ["brotli (>=1.0.0)", "brotlipy (>=0.7.0)"] +brotli = ["brotli (>=1.0.0) ; platform_python_implementation == \"CPython\"", "brotlipy (>=0.7.0) ; platform_python_implementation == \"PyPy\""] cassandra = ["cassandra-driver (>=3.25.0,<4)"] consul = ["python-consul2 (==0.1.5)"] cosmosdbsql = ["pydocumentdb (==2.3.5)"] -couchbase = ["couchbase (>=3.0.0)"] +couchbase = ["couchbase (>=3.0.0) ; platform_python_implementation != \"PyPy\" and (platform_system != \"Windows\" or python_version < \"3.10\")"] couchdb = ["pycouchdb (==1.14.2)"] django = ["Django (>=2.2.28)"] dynamodb = ["boto3 (>=1.26.143)"] elasticsearch = ["elastic-transport (<=8.13.0)", "elasticsearch (<=8.13.0)"] -eventlet = ["eventlet (>=0.32.0)"] +eventlet = ["eventlet (>=0.32.0) ; python_version < \"3.10\""] gcs = ["google-cloud-storage (>=2.10.0)"] gevent = ["gevent (>=1.5.0)"] -librabbitmq = ["librabbitmq (>=2.0.0)"] -memcache = ["pylibmc (==1.6.3)"] +librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] +memcache = ["pylibmc (==1.6.3) ; platform_system != \"Windows\""] mongodb = ["pymongo[srv] (>=4.0.2)"] msgpack = ["msgpack (==1.0.8)"] pymemcache = ["python-memcached (>=1.61)"] -pyro = ["pyro4 (==4.82)"] +pyro = ["pyro4 (==4.82) ; python_version < \"3.11\""] pytest = ["pytest-celery[all] (>=1.0.0)"] redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] s3 = ["boto3 (>=1.26.143)"] slmq = ["softlayer-messaging (>=1.0.3)"] -solar = ["ephem (==4.1.5)"] +solar = ["ephem (==4.1.5) ; platform_python_implementation != \"PyPy\""] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] -tblib = ["tblib (>=1.3.0)", "tblib (>=1.5.0)"] +sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] +tblib = ["tblib (>=1.3.0) ; python_version < \"3.8.0\"", "tblib (>=1.5.0) ; python_version >= \"3.8.0\""] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=1.3.1)"] zstd = ["zstandard (==0.22.0)"] @@ -1208,10 +1208,10 @@ files = [ ] [package.extras] -avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests"] -dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "flake8", "pytest", "pytest (==4.6.4)", "pytest-timeout", "requests"] -doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests", "sphinx", "sphinx-rtd-theme"] -json = ["jsonschema", "pyrsistent", "pyrsistent (==0.16.1)", "requests"] +avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests"] +dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "flake8", "pytest (==4.6.4) ; python_version < \"3.0\"", "pytest ; python_version >= \"3.0\"", "pytest-timeout", "requests"] +doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests", "sphinx", "sphinx-rtd-theme"] +json = ["jsonschema", "pyrsistent (==0.16.1) ; python_version < \"3.0\"", "pyrsistent ; python_version > \"3.0\"", "requests"] protobuf = ["protobuf", "requests"] schema-registry = ["requests"] @@ -1386,7 +1386,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli"] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "cryptography" @@ -1476,8 +1476,8 @@ jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, - {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version == \"3.10\""}, + {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""}, ] pyyaml = ">=6.0.1" @@ -1522,7 +1522,7 @@ xxhash = "*" apache-beam = ["apache-beam (>=2.26.0)"] audio = ["librosa", "soundfile (>=0.12.1)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] @@ -1530,7 +1530,7 @@ quality = ["ruff (>=0.3.0)"] s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.6.0)"] tensorflow-gpu = ["tensorflow (>=2.6.0)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=6.2.1)"] @@ -1738,7 +1738,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version <= \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -1760,7 +1760,7 @@ files = [ ] [package.extras] -tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] +tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] [[package]] name = "faker" @@ -1853,7 +1853,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] -typing = ["typing-extensions (>=4.12.2)"] +typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] [[package]] name = "flatbuffers" @@ -1945,18 +1945,18 @@ files = [ ] [package.extras] -all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] graphite = ["lz4 (>=1.7.4.2)"] -interpolatable = ["munkres", "pycairo", "scipy"] +interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] lxml = ["lxml (>=4.0)"] pathops = ["skia-pathops (>=0.5.0)"] plot = ["matplotlib"] repacker = ["uharfbuzz (>=0.23.0)"] symfont = ["sympy"] -type1 = ["xattr"] +type1 = ["xattr ; sys_platform == \"darwin\""] ufo = ["fs (>=2.2.0,<3)"] -unicode = ["unicodedata2 (>=15.1.0)"] -woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] +unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""] +woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] [[package]] name = "fqdn" @@ -2531,7 +2531,7 @@ idna = "*" sniffio = "*" [package.extras] -brotli = ["brotli", "brotlicffi"] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -2740,7 +2740,7 @@ zipp = ">=0.5" [package.extras] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] +test = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] [[package]] name = "importlib-resources" @@ -2759,7 +2759,7 @@ markers = {dev = "python_version < \"3.10\""} zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] @@ -2780,7 +2780,7 @@ files = [ [package.extras] docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] -testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\""] [[package]] name = "iniconfig" @@ -3218,7 +3218,7 @@ traitlets = ">=5.3" [package.extras] docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] +test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko ; sys_platform == \"win32\"", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] [[package]] name = "jupyter-console" @@ -3648,7 +3648,7 @@ azureservicebus = ["azure-servicebus (>=7.10.0)"] azurestoragequeues = ["azure-identity (>=1.12.0)", "azure-storage-queue (>=12.6.0)"] confluentkafka = ["confluent-kafka (>=2.2.0)"] consul = ["python-consul2 (==0.1.5)"] -librabbitmq = ["librabbitmq (>=2.0.0)"] +librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] mongodb = ["pymongo (>=4.1.1)"] msgpack = ["msgpack (==1.1.0)"] pyro = ["pyro4 (==4.82)"] @@ -3656,7 +3656,7 @@ qpid = ["qpid-python (>=0.26)", "qpid-tools (>=0.26)"] redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2)"] slmq = ["softlayer-messaging (>=1.0.3)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] +sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=2.8.0)"] @@ -3696,7 +3696,7 @@ optional = false python-versions = ">=3.9,<4" groups = ["main"] files = [ - {file = "e3f6c7061c8798c91fce779ec7f4571e985e3810.zip", hash = "sha256:d89db23bfc023a38c8605fe3d661a2e21dee0522541d8ebca73be783ab9219a6"}, + {file = "2b0d2cc9403c83cd99dc3e6cc06a580d9e9db4f1.zip", hash = "sha256:118574bddf431edf96d564f55173f6e0f104d233c1ebc3dac8ca68decd4fc6f4"}, ] [package.dependencies] @@ -3722,7 +3722,7 @@ xmljson = "0.2.1" [package.source] type = "url" -url = "https://github.com/HumanSignal/label-studio-sdk/archive/e3f6c7061c8798c91fce779ec7f4571e985e3810.zip" +url = "https://github.com/HumanSignal/label-studio-sdk/archive/2b0d2cc9403c83cd99dc3e6cc06a580d9e9db4f1.zip" [[package]] name = "litellm" @@ -4191,7 +4191,7 @@ watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] [[package]] name = "mkdocs-autorefs" @@ -4508,7 +4508,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] +gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] [[package]] @@ -4529,7 +4529,7 @@ PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.14,<0.18)", "pymsalruntime (>=0.17,<0.18)"] +broker = ["pymsalruntime (>=0.14,<0.18) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.18) ; python_version >= \"3.8\" and platform_system == \"Darwin\""] [[package]] name = "multidict" @@ -4817,7 +4817,7 @@ tornado = ">=6.2.0" [package.extras] dev = ["hatch", "pre-commit"] docs = ["myst-parser", "nbsphinx", "pydata-sphinx-theme", "sphinx (>=1.3.6)", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] +test = ["importlib-resources (>=5.0) ; python_version < \"3.10\"", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"] [[package]] name = "notebook-shim" @@ -5524,7 +5524,7 @@ docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] @@ -5663,7 +5663,7 @@ files = [ ] [package.extras] -test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] +test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""] [[package]] name = "ptyprocess" @@ -5817,7 +5817,7 @@ typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata"] +timezone = ["tzdata ; python_version >= \"3.9\" and sys_platform == \"win32\""] [[package]] name = "pydantic-core" @@ -7115,9 +7115,9 @@ files = [ ] [package.extras] -nativelib = ["pyobjc-framework-Cocoa", "pywin32"] -objc = ["pyobjc-framework-Cocoa"] -win32 = ["pywin32"] +nativelib = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\"", "pywin32 ; sys_platform == \"win32\""] +objc = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\""] +win32 = ["pywin32 ; sys_platform == \"win32\""] [[package]] name = "setuptools" @@ -7132,9 +7132,9 @@ files = [ ] [package.extras] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6) ; python_version < \"3.10\"", "importlib-resources (>=5.10.2) ; python_version < \"3.9\"", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (<0.4) ; platform_system == \"Windows\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.3.2) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "shellingham" @@ -7534,7 +7534,7 @@ files = [ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] -markers = {main = "python_version < \"3.11\""} +markers = {main = "python_version <= \"3.10\""} [[package]] name = "tomli" @@ -7543,7 +7543,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version <= \"3.10\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -7836,8 +7836,8 @@ files = [ ] [package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -7860,12 +7860,12 @@ httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -7874,7 +7874,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\"" +markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"" files = [ {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9ebafa0b96c62881d5cafa02d9da2e44c23f9f0cd829f3a32a6aff771449c996"}, {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:35968fc697b0527a06e134999eef859b4034b37aebca537daeb598b9d45a137b"}, @@ -7911,7 +7911,7 @@ files = [ [package.extras] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0) ; python_version >= \"3.12\"", "aiohttp (>=3.8.1) ; python_version < \"3.12\"", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "vcrpy" @@ -7921,6 +7921,7 @@ optional = false python-versions = ">=3.8" groups = ["dev"] files = [ + {file = "vcrpy-6.0.1-py2.py3-none-any.whl", hash = "sha256:621c3fb2d6bd8aa9f87532c688e4575bcbbde0c0afeb5ebdb7e14cac409edfdd"}, {file = "vcrpy-6.0.1.tar.gz", hash = "sha256:9e023fee7f892baa0bbda2f7da7c8ac51165c1c6e38ff8688683a12a4bde9278"}, ] @@ -8594,14 +8595,14 @@ files = [ markers = {dev = "python_version < \"3.10\""} [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "72960a903c05976830225f572261c64d423402a2ef58317b2f339119c3c8f375" +content-hash = "167ce58f336fb164546c14a06fe12ae870266918f4290ab8b41d31e4b72d8307" diff --git a/pyproject.toml b/pyproject.toml index 183d5ad8..d230b4a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ celery = {version = "^5.3.6", extras = ["redis"]} kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery uvicorn = "*" pydantic-settings = "^2.2.1" -label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/e3f6c7061c8798c91fce779ec7f4571e985e3810.zip"} +label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/2b0d2cc9403c83cd99dc3e6cc06a580d9e9db4f1.zip"} kafka-python-ng = "^2.2.3" requests = "^2.32.0" # Using litellm from forked repo until vertex fix is released: https://github.com/BerriAI/litellm/issues/7904