2
2
3
3
import tempfile
4
4
from pathlib import Path
5
+ from typing import Literal
5
6
6
7
import httpx
7
8
import json
15
16
from unstructured_client import UnstructuredClient
16
17
from unstructured_client .models import shared , operations
17
18
from unstructured_client .models .errors import HTTPValidationError
19
+ from unstructured_client .models .shared .partition_parameters import Strategy
18
20
from unstructured_client .utils .retries import BackoffStrategy , RetryConfig
19
21
from unstructured_client ._hooks .custom import form_utils
20
22
from unstructured_client ._hooks .custom import split_pdf_hook
@@ -105,19 +107,22 @@ def test_integration_split_pdf_has_same_output_as_non_split(
105
107
)
106
108
assert len (diff ) == 0
107
109
108
- @ pytest . mark . parametrize ( ( "filename" , "expected_ok" , "strategy" ), [
109
- ( "_sample_docs/layout-parser-paper.pdf " , True , "hi_res " ), # 16
110
- ] # pages
111
- )
112
- @pytest .mark .parametrize ( ("use_caching" , "cache_dir" ), [
110
+
111
+ @ pytest . mark . parametrize (( "filename " , "expected_ok" , "strategy " ), [
112
+ ( "_sample_docs/layout-parser-paper.pdf" , True , shared . Strategy . HI_RES ), # 16 pages
113
+ ] )
114
+ @pytest .mark .parametrize (("use_caching" , "cache_dir" ), [
113
115
(True , None ), # Use default cache dir
114
116
(True , Path (tempfile .gettempdir ()) / "test_integration_unstructured_client1" ), # Use custom cache dir
115
117
(False , None ), # Don't use caching
116
118
(False , Path (tempfile .gettempdir ()) / "test_integration_unstructured_client2" ), # Don't use caching, use custom cache dir
117
119
])
118
120
def test_integration_split_pdf_with_caching (
119
- filename : str , expected_ok : bool , strategy : str , use_caching : bool ,
120
- cache_dir : Path | None
121
+ filename : str ,
122
+ expected_ok : bool ,
123
+ strategy : Literal [Strategy .HI_RES ],
124
+ use_caching : bool ,
125
+ cache_dir : Path | None ,
121
126
):
122
127
try :
123
128
response = requests .get ("http://localhost:8000/general/docs" )
@@ -140,10 +145,9 @@ def test_integration_split_pdf_with_caching(
140
145
parameters = shared .PartitionParameters (
141
146
files = files ,
142
147
strategy = strategy ,
143
- languages = ["eng" ],
144
148
split_pdf_page = True ,
145
149
split_pdf_cache_tmp_data = use_caching ,
146
- split_pdf_cache_dir = cache_dir ,
150
+ split_pdf_cache_tmp_data_dir = str ( cache_dir ) ,
147
151
)
148
152
149
153
req = operations .PartitionRequest (
@@ -185,6 +189,7 @@ def test_integration_split_pdf_with_caching(
185
189
if cache_dir :
186
190
assert not Path (cache_dir ).exists ()
187
191
192
+
188
193
@pytest .mark .parametrize ("filename" , ["_sample_docs/super_long_pages.pdf" ])
189
194
def test_long_pages_hi_res (filename ):
190
195
req = operations .PartitionRequest (partition_parameters = shared .PartitionParameters (
0 commit comments