Skip to content

Commit 1c35ccf

Browse files
committed
chore: update for dataset rewrite
Signed-off-by: Grant Linville <[email protected]>
1 parent e9f3b2f commit 1c35ccf

File tree

4 files changed

+53
-124
lines changed

4 files changed

+53
-124
lines changed

gptscript/datasets.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import base64
2-
from typing import Dict
32
from pydantic import BaseModel, field_serializer, field_validator, BeforeValidator
43

54

@@ -10,28 +9,17 @@ class DatasetElementMeta(BaseModel):
109

1110
class DatasetElement(BaseModel):
1211
name: str
13-
description: str
14-
contents: bytes
12+
description: str = ""
13+
contents: str = ""
14+
binaryContents: bytes = b""
1515

16-
@field_serializer("contents")
16+
@field_serializer("binaryContents")
1717
def serialize_contents(self, value: bytes) -> str:
1818
return base64.b64encode(value).decode("utf-8")
1919

20-
@field_validator("contents", mode="before")
20+
@field_validator("binaryContents", mode="before")
2121
def deserialize_contents(cls, value) -> bytes:
2222
if isinstance(value, str):
2323
return base64.b64decode(value)
2424
return value
2525

26-
27-
class DatasetMeta(BaseModel):
28-
id: str
29-
name: str
30-
description: str
31-
32-
33-
class Dataset(BaseModel):
34-
id: str
35-
name: str
36-
description: str
37-
elements: Dict[str, DatasetElementMeta]

gptscript/gptscript.py

Lines changed: 23 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from gptscript.confirm import AuthResponse
1010
from gptscript.credentials import Credential, to_credential
11-
from gptscript.datasets import DatasetMeta, Dataset, DatasetElementMeta, DatasetElement
11+
from gptscript.datasets import DatasetElementMeta, DatasetElement
1212
from gptscript.fileinfo import FileInfo
1313
from gptscript.frame import RunFrame, CallFrame, PromptFrame, Program
1414
from gptscript.opts import GlobalOptions
@@ -213,109 +213,54 @@ async def delete_credential(self, context: str = "default", name: str = "") -> s
213213
{"context": [context], "name": name}
214214
)
215215

216-
async def list_datasets(self, workspace_id: str) -> List[DatasetMeta]:
217-
if workspace_id == "":
218-
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
219-
216+
# list_datasets returns an array of dataset IDs
217+
async def list_datasets(self) -> List[str]:
220218
res = await self._run_basic_command(
221219
"datasets",
222-
{"input": "{}", "workspaceID": workspace_id, "datasetToolRepo": self.opts.DatasetToolRepo,
223-
"env": self.opts.Env}
224-
)
225-
return [DatasetMeta.model_validate(d) for d in json.loads(res)]
226-
227-
async def create_dataset(self, workspace_id: str, name: str, description: str = "") -> Dataset:
228-
if workspace_id == "":
229-
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
230-
231-
if name == "":
232-
raise ValueError("name cannot be empty")
233-
234-
res = await self._run_basic_command(
235-
"datasets/create",
236220
{
237-
"input": json.dumps({"datasetName": name, "datasetDescription": description}),
238-
"workspaceID": workspace_id,
239-
"datasetToolRepo": self.opts.DatasetToolRepo,
240-
"env": self.opts.Env,
241-
}
242-
)
243-
return Dataset.model_validate_json(res)
244-
245-
async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: bytes,
246-
elementDescription: str = "") -> DatasetElementMeta:
247-
if workspace_id == "":
248-
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
249-
250-
if datasetID == "":
251-
raise ValueError("datasetID cannot be empty")
252-
elif elementName == "":
253-
raise ValueError("elementName cannot be empty")
254-
elif not elementContent:
255-
raise ValueError("elementContent cannot be empty")
256-
257-
res = await self._run_basic_command(
258-
"datasets/add-element",
259-
{
260-
"input": json.dumps({
261-
"datasetID": datasetID,
262-
"elementName": elementName,
263-
"elementContent": base64.b64encode(elementContent).decode("utf-8"),
264-
"elementDescription": elementDescription,
265-
}),
266-
"workspaceID": workspace_id,
267-
"datasetToolRepo": self.opts.DatasetToolRepo,
221+
"input": json.dumps({"workspaceID": os.getenv("GPTSCRIPT_WORKSPACE_ID")}),
222+
"datasetTool": self.opts.DatasetTool,
268223
"env": self.opts.Env
269224
}
270225
)
271-
return DatasetElementMeta.model_validate_json(res)
226+
return json.loads(res)
272227

273-
async def add_dataset_elements(self, workspace_id: str, datasetID: str, elements: List[DatasetElement]) -> str:
274-
if workspace_id == "":
275-
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
276-
277-
if datasetID == "":
278-
raise ValueError("datasetID cannot be empty")
279-
elif not elements:
228+
async def add_dataset_elements(self, elements: List[DatasetElement], datasetID: str = "") -> str:
229+
if not elements:
280230
raise ValueError("elements cannot be empty")
281231

282232
res = await self._run_basic_command(
283233
"datasets/add-elements",
284234
{
285235
"input": json.dumps({
236+
"workspaceID": os.getenv("GPTSCRIPT_WORKSPACE_ID"),
286237
"datasetID": datasetID,
287238
"elements": [element.model_dump() for element in elements],
288239
}),
289-
"workspaceID": workspace_id,
290-
"datasetToolRepo": self.opts.DatasetToolRepo,
240+
"datasetTool": self.opts.DatasetTool,
291241
"env": self.opts.Env
292242
}
293243
)
294244
return res
295245

296-
297-
async def list_dataset_elements(self, workspace_id: str, datasetID: str) -> List[DatasetElementMeta]:
298-
if workspace_id == "":
299-
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
300-
246+
async def list_dataset_elements(self, datasetID: str) -> List[DatasetElementMeta]:
301247
if datasetID == "":
302248
raise ValueError("datasetID cannot be empty")
303249

304250
res = await self._run_basic_command(
305251
"datasets/list-elements",
306252
{
307-
"input": json.dumps({"datasetID": datasetID}),
308-
"workspaceID": workspace_id,
309-
"datasetToolRepo": self.opts.DatasetToolRepo,
253+
"input": json.dumps({
254+
"workspaceID": os.getenv("GPTSCRIPT_WORKSPACE_ID"),
255+
"datasetID": datasetID,
256+
}),
257+
"datasetTool": self.opts.DatasetTool,
310258
"env": self.opts.Env
311259
}
312260
)
313261
return [DatasetElementMeta.model_validate(d) for d in json.loads(res)]
314262

315-
async def get_dataset_element(self, workspace_id: str, datasetID: str, elementName: str) -> DatasetElement:
316-
if workspace_id == "":
317-
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
318-
263+
async def get_dataset_element(self, datasetID: str, elementName: str) -> DatasetElement:
319264
if datasetID == "":
320265
raise ValueError("datasetID cannot be empty")
321266
elif elementName == "":
@@ -324,9 +269,12 @@ async def get_dataset_element(self, workspace_id: str, datasetID: str, elementNa
324269
res = await self._run_basic_command(
325270
"datasets/get-element",
326271
{
327-
"input": json.dumps({"datasetID": datasetID, "element": elementName}),
328-
"workspaceID": workspace_id,
329-
"datasetToolRepo": self.opts.DatasetToolRepo,
272+
"input": json.dumps({
273+
"workspaceID": os.getenv("GPTSCRIPT_WORKSPACE_ID"),
274+
"datasetID": datasetID,
275+
"name": elementName,
276+
}),
277+
"datasetTool": self.opts.DatasetTool,
330278
"env": self.opts.Env,
331279
}
332280
)

gptscript/opts.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def __init__(
1212
defaultModelProvider: str = "",
1313
defaultModel: str = "",
1414
cacheDir: str = "",
15-
datasetToolRepo: str = "",
15+
datasetTool: str = "",
1616
workspaceTool: str = "",
1717
env: list[str] = None,
1818
):
@@ -23,7 +23,7 @@ def __init__(
2323
self.DefaultModel = defaultModel
2424
self.DefaultModelProvider = defaultModelProvider
2525
self.CacheDir = cacheDir
26-
self.DatasetToolRepo = datasetToolRepo
26+
self.DatasetTool = datasetTool
2727
self.WorkspaceTool = workspaceTool
2828
if env is None:
2929
env = [f"{k}={v}" for k, v in os.environ.items()]
@@ -42,7 +42,7 @@ def merge(self, other: Self) -> Self:
4242
cp.DefaultModel = other.DefaultModel if other.DefaultModel != "" else self.DefaultModel
4343
cp.DefaultModelProvider = other.DefaultModelProvider if other.DefaultModelProvider != "" else self.DefaultModelProvider
4444
cp.CacheDir = other.CacheDir if other.CacheDir != "" else self.CacheDir
45-
cp.DatasetToolRepo = other.DatasetToolRepo if other.DatasetToolRepo != "" else self.DatasetToolRepo
45+
cp.DatasetTool = other.DatasetTool if other.DatasetTool != "" else self.DatasetTool
4646
cp.WorkspaceTool = other.WorkspaceTool if other.WorkspaceTool != "" else self.WorkspaceTool
4747
cp.Env = (other.Env or [])
4848
cp.Env.extend(self.Env or [])

tests/test_gptscript.py

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -761,57 +761,50 @@ async def test_credentials(gptscript):
761761
@pytest.mark.asyncio
762762
async def test_datasets(gptscript):
763763
workspace_id = await gptscript.create_workspace("directory")
764-
dataset_name = str(os.urandom(8).hex())
764+
os.environ["GPTSCRIPT_WORKSPACE_ID"] = workspace_id
765765

766766
# Create dataset
767-
dataset = await gptscript.create_dataset(workspace_id, dataset_name, "this is a test dataset")
768-
assert dataset.id != "", "Expected dataset id to be set"
769-
assert dataset.name == dataset_name, "Expected dataset name to match"
770-
assert dataset.description == "this is a test dataset", "Expected dataset description to match"
771-
assert len(dataset.elements) == 0, "Expected dataset elements to be empty"
772-
773-
# Add an element
774-
element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", b"element1 contents",
775-
"element1 description")
776-
assert element_meta.name == "element1", "Expected element name to match"
777-
assert element_meta.description == "element1 description", "Expected element description to match"
767+
dataset_id = await gptscript.add_dataset_elements([
768+
DatasetElement(name="element1", contents="element1 contents", description="element1 description"),
769+
DatasetElement(name="element2", binaryContents=b"element2 contents", description="element2 description"),
770+
])
778771

779772
# Add two more elements
780-
await gptscript.add_dataset_elements(workspace_id, dataset.id, [
781-
DatasetElement(name="element2", contents=b"element2 contents", description="element2 description"),
782-
DatasetElement(name="element3", contents=b"element3 contents", description="element3 description"),
783-
])
773+
await gptscript.add_dataset_elements([
774+
DatasetElement(name="element3", contents="element3 contents", description="element3 description"),
775+
DatasetElement(name="element4", contents="element3 contents", description="element4 description"),
776+
], dataset_id)
784777

785778
# Get the elements
786-
e1 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element1")
779+
e1 = await gptscript.get_dataset_element(dataset_id, "element1")
787780
assert e1.name == "element1", "Expected element name to match"
788-
assert e1.contents == b"element1 contents", "Expected element contents to match"
781+
assert e1.contents == "element1 contents", "Expected element contents to match"
789782
assert e1.description == "element1 description", "Expected element description to match"
790-
e2 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element2")
783+
e2 = await gptscript.get_dataset_element(dataset_id, "element2")
791784
assert e2.name == "element2", "Expected element name to match"
792-
assert e2.contents == b"element2 contents", "Expected element contents to match"
785+
assert e2.binaryContents == b"element2 contents", "Expected element contents to match"
793786
assert e2.description == "element2 description", "Expected element description to match"
794-
e3 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element3")
787+
e3 = await gptscript.get_dataset_element(dataset_id, "element3")
795788
assert e3.name == "element3", "Expected element name to match"
796-
assert e3.contents == b"element3 contents", "Expected element contents to match"
789+
assert e3.contents == "element3 contents", "Expected element contents to match"
797790
assert e3.description == "element3 description", "Expected element description to match"
798791

799792
# List elements in the dataset
800-
elements = await gptscript.list_dataset_elements(workspace_id, dataset.id)
801-
assert len(elements) == 3, "Expected one element in the dataset"
793+
elements = await gptscript.list_dataset_elements(dataset_id)
794+
assert len(elements) == 4, "Expected four elements in the dataset"
802795
assert elements[0].name == "element1", "Expected element name to match"
803796
assert elements[0].description == "element1 description", "Expected element description to match"
804797
assert elements[1].name == "element2", "Expected element name to match"
805798
assert elements[1].description == "element2 description", "Expected element description to match"
806799
assert elements[2].name == "element3", "Expected element name to match"
807800
assert elements[2].description == "element3 description", "Expected element description to match"
801+
assert elements[3].name == "element4", "Expected element name to match"
802+
assert elements[3].description == "element4 description", "Expected element description to match"
808803

809804
# List datasets
810-
datasets = await gptscript.list_datasets(workspace_id)
811-
assert len(datasets) > 0, "Expected at least one dataset"
812-
assert datasets[0].id == dataset.id, "Expected dataset id to match"
813-
assert datasets[0].name == dataset_name, "Expected dataset name to match"
814-
assert datasets[0].description == "this is a test dataset", "Expected dataset description to match"
805+
dataset_ids = await gptscript.list_datasets()
806+
assert len(dataset_ids) > 0, "Expected at least one dataset"
807+
assert dataset_ids[0] == dataset_id, "Expected dataset id to match"
815808

816809
await gptscript.delete_workspace(workspace_id)
817810

0 commit comments

Comments
 (0)