|
7 | 7 |
|
8 | 8 | @dataclasses.dataclass
|
9 | 9 | class Files:
|
10 |
| - content: bytes = dataclasses.field(metadata={"multipart_form": {"content": True}}) |
11 |
| - file_name: str = dataclasses.field( |
12 |
| - metadata={"multipart_form": {"field_name": "files"}} |
13 |
| - ) |
| 10 | + content: bytes = dataclasses.field(metadata={'multipart_form': { 'content': True }}) |
| 11 | + file_name: str = dataclasses.field(metadata={'multipart_form': { 'field_name': 'files' }}) |
| 12 | + |
| 13 | + |
14 | 14 |
|
15 | 15 |
|
16 | 16 | @dataclasses.dataclass
|
17 | 17 | class PartitionParameters:
|
18 |
| - chunking_strategy: Optional[str] = dataclasses.field( |
19 |
| - default=None, metadata={"multipart_form": {"field_name": "chunking_strategy"}} |
20 |
| - ) |
| 18 | + chunking_strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) |
21 | 19 | r"""Use one of the supported strategies to chunk the returned elements. Currently supports: by_title"""
|
22 |
| - combine_under_n_chars: Optional[int] = dataclasses.field( |
23 |
| - default=None, |
24 |
| - metadata={"multipart_form": {"field_name": "combine_under_n_chars"}}, |
25 |
| - ) |
| 20 | + combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) |
26 | 21 | r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500"""
|
27 |
| - coordinates: Optional[bool] = dataclasses.field( |
28 |
| - default=None, metadata={"multipart_form": {"field_name": "coordinates"}} |
29 |
| - ) |
| 22 | + coordinates: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'coordinates' }}) |
30 | 23 | r"""If true, return coordinates for each element. Default: false"""
|
31 |
| - encoding: Optional[str] = dataclasses.field( |
32 |
| - default=None, metadata={"multipart_form": {"field_name": "encoding"}} |
33 |
| - ) |
| 24 | + encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }}) |
34 | 25 | r"""The encoding method used to decode the text input. Default: utf-8"""
|
35 |
| - files: Optional[Files] = dataclasses.field( |
36 |
| - default=None, metadata={"multipart_form": {"file": True}} |
37 |
| - ) |
| 26 | + files: Optional[Files] = dataclasses.field(default=None, metadata={'multipart_form': { 'file': True }}) |
38 | 27 | r"""The file to extract"""
|
39 |
| - gz_uncompressed_content_type: Optional[str] = dataclasses.field( |
40 |
| - default=None, |
41 |
| - metadata={"multipart_form": {"field_name": "gz_uncompressed_content_type"}}, |
42 |
| - ) |
| 28 | + gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) |
43 | 29 | r"""If file is gzipped, use this content type after unzipping"""
|
44 |
| - hi_res_model_name: Optional[str] = dataclasses.field( |
45 |
| - default=None, metadata={"multipart_form": {"field_name": "hi_res_model_name"}} |
46 |
| - ) |
| 30 | + hi_res_model_name: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) |
47 | 31 | r"""The name of the inference model used when strategy is hi_res"""
|
48 |
| - include_page_breaks: Optional[bool] = dataclasses.field( |
49 |
| - default=None, metadata={"multipart_form": {"field_name": "include_page_breaks"}} |
50 |
| - ) |
| 32 | + include_page_breaks: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) |
51 | 33 | r"""If True, the output will include page breaks if the filetype supports it. Default: false"""
|
52 |
| - languages: Optional[List[str]] = dataclasses.field( |
53 |
| - default=None, metadata={"multipart_form": {"field_name": "languages"}} |
54 |
| - ) |
| 34 | + languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }}) |
55 | 35 | r"""The languages present in the document, for use in partitioning and/or OCR"""
|
56 |
| - max_characters: Optional[int] = dataclasses.field( |
57 |
| - default=None, metadata={"multipart_form": {"field_name": "max_characters"}} |
58 |
| - ) |
| 36 | + max_characters: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'max_characters' }}) |
59 | 37 | r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500"""
|
60 |
| - multipage_sections: Optional[bool] = dataclasses.field( |
61 |
| - default=None, metadata={"multipart_form": {"field_name": "multipage_sections"}} |
62 |
| - ) |
| 38 | + multipage_sections: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) |
63 | 39 | r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true"""
|
64 |
| - new_after_n_chars: Optional[int] = dataclasses.field( |
65 |
| - default=None, metadata={"multipart_form": {"field_name": "new_after_n_chars"}} |
66 |
| - ) |
| 40 | + new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) |
67 | 41 | r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500"""
|
68 |
| - output_format: Optional[str] = dataclasses.field( |
69 |
| - default=None, metadata={"multipart_form": {"field_name": "output_format"}} |
70 |
| - ) |
| 42 | + output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }}) |
71 | 43 | r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json."""
|
72 |
| - pdf_infer_table_structure: Optional[bool] = dataclasses.field( |
73 |
| - default=None, |
74 |
| - metadata={"multipart_form": {"field_name": "pdf_infer_table_structure"}}, |
75 |
| - ) |
| 44 | + pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) |
76 | 45 | r"""If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML <table>."""
|
77 |
| - skip_infer_table_types: Optional[List[str]] = dataclasses.field( |
78 |
| - default=None, |
79 |
| - metadata={"multipart_form": {"field_name": "skip_infer_table_types"}}, |
80 |
| - ) |
| 46 | + skip_infer_table_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'skip_infer_table_types' }}) |
81 | 47 | r"""The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']"""
|
82 |
| - strategy: Optional[str] = dataclasses.field( |
83 |
| - default=None, metadata={"multipart_form": {"field_name": "strategy"}} |
84 |
| - ) |
| 48 | + strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'strategy' }}) |
85 | 49 | r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto"""
|
86 |
| - xml_keep_tags: Optional[bool] = dataclasses.field( |
87 |
| - default=None, metadata={"multipart_form": {"field_name": "xml_keep_tags"}} |
88 |
| - ) |
| 50 | + xml_keep_tags: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) |
89 | 51 | r"""If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml."""
|
| 52 | + |
| 53 | + |
0 commit comments