Skip to content

Commit 7e7752a

Browse files
committed
add option to remap data entity names to the original ones
1 parent 64397c7 commit 7e7752a

File tree

3 files changed

+70
-7
lines changed

3 files changed

+70
-7
lines changed

src/runcrate/cli.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,20 @@ def cli():
5656
type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path),
5757
help="path to a README file (should be README.md in Markdown format)",
5858
)
59-
def convert(root, output, license, workflow_name, readme):
59+
@click.option(
60+
"--remap-names",
61+
help="remap file/dir names to the original ones (MAY LEAD TO CLASHES!)",
62+
is_flag=True
63+
)
64+
def convert(root, output, license, workflow_name, readme, remap_names):
6065
"""\
6166
Convert a CWLProv RO bundle into a Workflow Run RO-Crate.
6267
6368
RO_DIR: top-level directory of the CWLProv RO
6469
"""
6570
if not output:
6671
output = Path(f"{root.name}.crate.zip")
67-
builder = ProvCrateBuilder(root, workflow_name, license, readme)
72+
builder = ProvCrateBuilder(root, workflow_name, license, readme, remap_names=remap_names)
6873
crate = builder.build()
6974
if output.suffix == ".zip":
7075
crate.write_zip(output)

src/runcrate/convert.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ def get_workflow(wf_path):
192192

193193
class ProvCrateBuilder:
194194

195-
def __init__(self, root, workflow_name=None, license=None, readme=None):
195+
def __init__(self, root, workflow_name=None, license=None, readme=None,
196+
remap_names=False):
196197
self.root = Path(root)
197198
self.workflow_name = workflow_name
198199
self.license = license
@@ -213,6 +214,7 @@ def __init__(self, root, workflow_name=None, license=None, readme=None):
213214
# map source files to destination files
214215
self.file_map = {}
215216
self.manifest = self._get_manifest()
217+
self.remap_names = remap_names
216218

217219
@staticmethod
218220
def _get_step_maps(cwl_defs):
@@ -583,14 +585,19 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
583585
return action_p
584586
if "wf4ever:File" in type_names:
585587
hash_ = self.hashes[prov_param.id.localpart]
586-
dest = Path(parent.id if parent else "") / hash_
588+
if self.remap_names:
589+
basename = getattr(prov_param, "basename", hash_)
590+
else:
591+
basename = hash_
592+
dest = Path(parent.id if parent else "") / basename
587593
action_p = crate.dereference(dest.as_posix())
588594
if not action_p:
589595
source = self.manifest[hash_]
590596
action_p = crate.add_file(source, dest, properties={
591597
"sha1": hash_,
592598
})
593-
self._set_alternate_name(prov_param, action_p, parent=parent)
599+
if not self.remap_names:
600+
self._set_alternate_name(prov_param, action_p, parent=parent)
594601
try:
595602
source_k = str(source.resolve(strict=False))
596603
except RuntimeError:
@@ -599,11 +606,16 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
599606
return action_p
600607
if "ro:Folder" in type_names:
601608
hash_ = self.hashes[prov_param.id.localpart]
602-
dest = Path(parent.id if parent else "") / hash_
609+
if self.remap_names:
610+
basename = getattr(prov_param, "basename", hash_)
611+
else:
612+
basename = hash_
613+
dest = Path(parent.id if parent else "") / basename
603614
action_p = crate.dereference(dest.as_posix())
604615
if not action_p:
605616
action_p = crate.add_directory(dest_path=dest)
606-
self._set_alternate_name(prov_param, action_p, parent=parent)
617+
if not self.remap_names:
618+
self._set_alternate_name(prov_param, action_p, parent=parent)
607619
for child in self.get_dict(prov_param).values():
608620
part = self.convert_param(child, crate, parent=action_p)
609621
action_p.append_to("hasPart", part)

tests/test_cwlprov_crate_builder.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,3 +1153,49 @@ def test_revsort_inline(data_dir, tmpdir, cwl_version):
11531153
(reverse_sort_param.id, reverse_param.id),
11541154
}
11551155
assert set(_connected(workflow)) == {(sort_out_param.id, out_file_param.id)}
1156+
1157+
1158+
def test_remap_names(data_dir, tmpdir):
1159+
root = data_dir / "grepucase-run-1"
1160+
output = tmpdir / "grepucase-run-1-crate"
1161+
license = "Apache-2.0"
1162+
builder = ProvCrateBuilder(root, license=license, remap_names=True)
1163+
crate = builder.build()
1164+
crate.write(output)
1165+
crate = ROCrate(output)
1166+
workflow = crate.mainEntity
1167+
action_map = {_["instrument"].id: _ for _ in crate.contextual_entities
1168+
if "CreateAction" in _.type}
1169+
assert len(action_map) == 3
1170+
wf_action = action_map["packed.cwl"]
1171+
assert wf_action["instrument"] is workflow
1172+
wf_objects = wf_action["object"]
1173+
wf_results = wf_action["result"]
1174+
assert len(wf_objects) == 2
1175+
assert len(wf_results) == 1
1176+
wf_objects_map = {_.id: _ for _ in wf_objects}
1177+
wf_input_dir = wf_objects_map.get("grepucase_in/")
1178+
assert wf_input_dir
1179+
wf_output_dir = wf_results[0]
1180+
assert wf_output_dir.id == "ucase_out/"
1181+
assert set(_.id for _ in wf_input_dir["hasPart"]) == {
1182+
"grepucase_in/bar", "grepucase_in/foo"
1183+
}
1184+
assert set(_.id for _ in wf_output_dir["hasPart"]) == {
1185+
"ucase_out/bar.out/", "ucase_out/foo.out/"
1186+
}
1187+
for d in wf_output_dir["hasPart"]:
1188+
if d.id == "ucase_out/bar.out/":
1189+
assert d["hasPart"][0].id == "ucase_out/bar.out/bar.out.out"
1190+
else:
1191+
assert d["hasPart"][0].id == "ucase_out/foo.out/foo.out.out"
1192+
greptool_action = action_map["packed.cwl#greptool.cwl"]
1193+
greptool_objects = greptool_action["object"]
1194+
greptool_results = greptool_action["result"]
1195+
assert len(greptool_objects) == 2
1196+
assert len(greptool_results) == 1
1197+
greptool_objects_map = {_.id: _ for _ in greptool_objects}
1198+
greptool_input_dir = greptool_objects_map.get("grepucase_in/")
1199+
assert greptool_input_dir is wf_input_dir
1200+
greptool_output_dir = greptool_results[0]
1201+
assert greptool_output_dir.id == "grep_out/"

0 commit comments

Comments
 (0)