|
7 | 7 |
|
8 | 8 | @dataclasses.dataclass
|
9 | 9 | class Files:
|
10 |
| - content: bytes = dataclasses.field(metadata={'multipart_form': { 'content': True }}) |
11 |
| - file_name: str = dataclasses.field(metadata={'multipart_form': { 'field_name': 'files' }}) |
12 |
| - |
13 |
| - |
| 10 | + content: bytes = dataclasses.field(metadata={"multipart_form": {"content": True}}) |
| 11 | + file_name: str = dataclasses.field( |
| 12 | + metadata={"multipart_form": {"field_name": "files"}} |
| 13 | + ) |
14 | 14 |
|
15 | 15 |
|
16 | 16 | @dataclasses.dataclass
|
17 | 17 | class PartitionParameters:
|
18 |
| - chunking_strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) |
| 18 | + chunking_strategy: Optional[str] = dataclasses.field( |
| 19 | + default=None, metadata={"multipart_form": {"field_name": "chunking_strategy"}} |
| 20 | + ) |
19 | 21 | r"""Use one of the supported strategies to chunk the returned elements. Currently supports: by_title"""
|
20 |
| - combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) |
| 22 | + combine_under_n_chars: Optional[int] = dataclasses.field( |
| 23 | + default=None, |
| 24 | + metadata={"multipart_form": {"field_name": "combine_under_n_chars"}}, |
| 25 | + ) |
21 | 26 | r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500"""
|
22 |
| - coordinates: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'coordinates' }}) |
| 27 | + coordinates: Optional[bool] = dataclasses.field( |
| 28 | + default=None, metadata={"multipart_form": {"field_name": "coordinates"}} |
| 29 | + ) |
23 | 30 | r"""If true, return coordinates for each element. Default: false"""
|
24 |
| - encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }}) |
| 31 | + encoding: Optional[str] = dataclasses.field( |
| 32 | + default=None, metadata={"multipart_form": {"field_name": "encoding"}} |
| 33 | + ) |
25 | 34 | r"""The encoding method used to decode the text input. Default: utf-8"""
|
26 |
| - files: Optional[Files] = dataclasses.field(default=None, metadata={'multipart_form': { 'file': True }}) |
| 35 | + files: Optional[Files] = dataclasses.field( |
| 36 | + default=None, metadata={"multipart_form": {"file": True}} |
| 37 | + ) |
27 | 38 | r"""The file to extract"""
|
28 |
| - gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) |
| 39 | + gz_uncompressed_content_type: Optional[str] = dataclasses.field( |
| 40 | + default=None, |
| 41 | + metadata={"multipart_form": {"field_name": "gz_uncompressed_content_type"}}, |
| 42 | + ) |
29 | 43 | r"""If file is gzipped, use this content type after unzipping"""
|
30 |
| - hi_res_model_name: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) |
| 44 | + hi_res_model_name: Optional[str] = dataclasses.field( |
| 45 | + default=None, metadata={"multipart_form": {"field_name": "hi_res_model_name"}} |
| 46 | + ) |
31 | 47 | r"""The name of the inference model used when strategy is hi_res"""
|
32 |
| - include_page_breaks: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) |
| 48 | + include_page_breaks: Optional[bool] = dataclasses.field( |
| 49 | + default=None, metadata={"multipart_form": {"field_name": "include_page_breaks"}} |
| 50 | + ) |
33 | 51 | r"""If True, the output will include page breaks if the filetype supports it. Default: false"""
|
34 |
| - languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }}) |
| 52 | + languages: Optional[List[str]] = dataclasses.field( |
| 53 | + default=None, metadata={"multipart_form": {"field_name": "languages"}} |
| 54 | + ) |
35 | 55 | r"""The languages present in the document, for use in partitioning and/or OCR"""
|
36 |
| - max_characters: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'max_characters' }}) |
| 56 | + max_characters: Optional[int] = dataclasses.field( |
| 57 | + default=None, metadata={"multipart_form": {"field_name": "max_characters"}} |
| 58 | + ) |
37 | 59 | r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500"""
|
38 |
| - multipage_sections: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) |
| 60 | + multipage_sections: Optional[bool] = dataclasses.field( |
| 61 | + default=None, metadata={"multipart_form": {"field_name": "multipage_sections"}} |
| 62 | + ) |
39 | 63 | r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true"""
|
40 |
| - new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) |
| 64 | + new_after_n_chars: Optional[int] = dataclasses.field( |
| 65 | + default=None, metadata={"multipart_form": {"field_name": "new_after_n_chars"}} |
| 66 | + ) |
41 | 67 | r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500"""
|
42 |
| - output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }}) |
| 68 | + output_format: Optional[str] = dataclasses.field( |
| 69 | + default=None, metadata={"multipart_form": {"field_name": "output_format"}} |
| 70 | + ) |
43 | 71 | r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json."""
|
44 |
| - pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) |
| 72 | + pdf_infer_table_structure: Optional[bool] = dataclasses.field( |
| 73 | + default=None, |
| 74 | + metadata={"multipart_form": {"field_name": "pdf_infer_table_structure"}}, |
| 75 | + ) |
45 | 76 | r"""If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML <table>."""
|
46 |
| - skip_infer_table_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'skip_infer_table_types' }}) |
| 77 | + skip_infer_table_types: Optional[List[str]] = dataclasses.field( |
| 78 | + default=None, |
| 79 | + metadata={"multipart_form": {"field_name": "skip_infer_table_types"}}, |
| 80 | + ) |
47 | 81 | r"""The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']"""
|
48 |
| - strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'strategy' }}) |
| 82 | + strategy: Optional[str] = dataclasses.field( |
| 83 | + default=None, metadata={"multipart_form": {"field_name": "strategy"}} |
| 84 | + ) |
49 | 85 | r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto"""
|
50 |
| - xml_keep_tags: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) |
| 86 | + xml_keep_tags: Optional[bool] = dataclasses.field( |
| 87 | + default=None, metadata={"multipart_form": {"field_name": "xml_keep_tags"}} |
| 88 | + ) |
51 | 89 | r"""If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml."""
|
52 |
| - |
53 |
| - |
|
0 commit comments