Skip to content

Commit 65e5469

Browse files
PanaetiusRalf Grubenmann
authored and
Ralf Grubenmann
committed
fix: Fixes JSON-LD translation and related issues (#846)
* Add default value converter and remove dataset property from DatasetFile * Removes type from Person context, fixes missing context in jsonld translation of project, fixes url encoding of dataset names
1 parent 255a01d commit 65e5469

File tree

8 files changed

+34
-13
lines changed

8 files changed

+34
-13
lines changed

renku/core/commands/checks/migration.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ def fix_dataset_files_urls(client):
240240
"""Ensure dataset files have correct url format."""
241241
for dataset in client.datasets.values():
242242
for file_ in dataset.files:
243-
file_.url = url_to_string(file_.url)
243+
if file_.url:
244+
file_.url = url_to_string(file_.url)
244245

245246
dataset.to_yaml()
246247

renku/core/commands/providers/dataverse.py

-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,6 @@ def as_dataset(self, client):
220220
filename=file_.name,
221221
filesize=file_.content_size,
222222
filetype=file_.file_format,
223-
dataset=dataset.name,
224223
path='',
225224
)
226225
serialized_files.append(dataset_file)

renku/core/commands/providers/zenodo.py

-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,6 @@ def as_dataset(self, client):
245245
filename=file_.filename,
246246
filesize=file_.filesize,
247247
filetype=file_.type,
248-
dataset=dataset.name,
249248
path='',
250249
)
251250
serialized_files.append(dataset_file)

renku/core/management/datasets.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,6 @@ def _add_from_local(self, dataset, path, link, destination):
304304
'path': path_in_repo,
305305
'url': path_in_repo,
306306
'creator': dataset.creator,
307-
'dataset': dataset.name,
308307
'parent': self
309308
}]
310309

@@ -326,7 +325,6 @@ def _add_from_local(self, dataset, path, link, destination):
326325
'path': destination.relative_to(self.path),
327326
'url': 'file://' + os.path.relpath(str(src), str(self.path)),
328327
'creator': dataset.creator,
329-
'dataset': dataset.name,
330328
'parent': self
331329
}]
332330

@@ -352,7 +350,6 @@ def _add_from_url(self, dataset, url, destination):
352350
'path': destination.relative_to(self.path),
353351
'url': remove_credentials(url),
354352
'creator': dataset.creator,
355-
'dataset': dataset.name,
356353
'parent': self
357354
}]
358355

@@ -361,6 +358,7 @@ def _add_from_git(self, dataset, url, sources, destination, ref):
361358
from renku import LocalClient
362359

363360
u = parse.urlparse(url)
361+
364362
sources = self._resolve_paths(u.path, sources)
365363

366364
# Get all files from repo that match sources
@@ -429,7 +427,6 @@ def _add_from_git(self, dataset, url, sources, destination, ref):
429427
'path': path_in_dst_repo,
430428
'url': remove_credentials(url),
431429
'creator': creators,
432-
'dataset': dataset.name,
433430
'parent': self,
434431
'based_on': based_on
435432
})

renku/core/models/datasets.py

-2
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,6 @@ class DatasetFile(Entity, CreatorMixin):
172172

173173
checksum = attr.ib(default=None, kw_only=True)
174174

175-
dataset = jsonld.ib(context='schema:isPartOf', default=None, kw_only=True)
176-
177175
filename = attr.ib(kw_only=True, converter=convert_filename_path)
178176

179177
name = jsonld.ib(context='schema:name', kw_only=True, default=None)

renku/core/models/jsonld.py

+21
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import weakref
2323
from copy import deepcopy
2424
from datetime import datetime, timezone
25+
from functools import partial
2526
from importlib import import_module
2627
from pathlib import Path
2728

@@ -265,12 +266,23 @@ def _propagate_reference_contexts(
265266
return current_context, scoped_properties
266267

267268

269+
def _default_converter(cls, value):
270+
"""A default converter method that tries to deserialize objects."""
271+
if isinstance(value, dict):
272+
return cls.from_jsonld(value)
273+
274+
return value
275+
276+
268277
def attrib(context=None, type=None, **kwargs):
269278
"""Create a new attribute with context."""
270279
kwargs.setdefault('metadata', {})
271280
kwargs['metadata'][KEY] = context
272281
if type:
273282
kwargs['metadata'][KEY_CLS] = type
283+
284+
if 'converter' not in kwargs and hasattr(type, 'from_jsonld'):
285+
kwargs['converter'] = partial(_default_converter, type)
274286
return attr.ib(**kwargs)
275287

276288

@@ -523,6 +535,15 @@ def from_jsonld(
523535

524536
for k, v in compacted.items():
525537
if k in fields:
538+
no_value_context = isinstance(v, dict) and '@context' not in v
539+
has_nested_context = (
540+
k in compacted['@context'] and
541+
'@context' in compacted['@context'][k]
542+
)
543+
if no_value_context and has_nested_context:
544+
# Propagate down context
545+
v['@context'] = compacted['@context'][k]['@context']
546+
526547
data_[k.lstrip('_')] = v
527548

528549
if __reference__:

renku/core/models/migrations/dataset.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,12 @@ def migrate_dataset_schema(data):
3030
)
3131

3232
data['creator'] = data.pop('authors', {})
33-
for file_name, file_ in data.get('files', {}).items():
33+
34+
files = data.get('files', [])
35+
36+
if isinstance(files, dict):
37+
files = files.values()
38+
for file_ in files:
3439
file_['creator'] = file_.pop('authors', {})
3540

3641
return data
@@ -52,13 +57,13 @@ def migrate_absolute_paths(data):
5257
files = data.get('files', [])
5358

5459
if isinstance(files, dict):
55-
files = files.values()
60+
files = list(files.values())
5661

5762
for file_ in files:
5863
path = Path(file_.get('path'), '.')
5964
if path.is_absolute():
6065
file_['path'] = path.relative_to((os.getcwd()))
61-
66+
data['files'] = files
6267
return data
6368

6469

renku/core/models/projects.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ class Project(object):
7373
kw_only=True,
7474
context={
7575
'@id': 'schema:creator',
76-
'@type': 'schema:Person',
7776
},
7877
type=Person
7978
)
@@ -120,6 +119,8 @@ def project_id(self):
120119
owner = remote.get('owner') or owner
121120
name = remote.get('name') or name
122121
host = os.environ.get('RENKU_DOMAIN') or host
122+
if name:
123+
name = urllib.parse.quote(name, safe='')
123124
project_url = urllib.parse.urljoin(
124125
'https://{host}'.format(host=host),
125126
pathlib.posixpath.join(PROJECT_URL_PATH, owner, name or 'NULL')

0 commit comments

Comments
 (0)