Skip to content

Commit fc20b61

Browse files
committed
names remapping: avoid clashes by using different dirs
1 parent 57e7958 commit fc20b61

File tree

4 files changed

+64
-31
lines changed

4 files changed

+64
-31
lines changed

src/runcrate/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def cli():
5858
)
5959
@click.option(
6060
"--remap-names",
61-
help="remap file/dir names to the original ones (MAY LEAD TO CLASHES!)",
61+
help="remap file/dir names to the original ones",
6262
is_flag=True
6363
)
6464
def convert(root, output, license, workflow_name, readme, remap_names):

src/runcrate/convert.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
"null": None,
5656
}
5757

58-
SCATTER_JOB_PATTERN = re.compile(r"^(.+)_\d+$")
58+
SCATTER_JOB_PATTERN = re.compile(r"^(.+)_(\d+)$")
5959

6060
CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
6161

@@ -215,6 +215,7 @@ def __init__(self, root, workflow_name=None, license=None, readme=None,
215215
self.file_map = {}
216216
self.manifest = self._get_manifest()
217217
self.remap_names = remap_names
218+
self.data_root = "data"
218219

219220
@staticmethod
220221
def _get_step_maps(cwl_defs):
@@ -240,11 +241,13 @@ def _get_manifest(self):
240241
def _resolve_plan(self, activity):
241242
job_qname = activity.plan()
242243
plan = activity.provenance.entity(job_qname)
244+
scatter_id = None
243245
if not plan:
244246
m = SCATTER_JOB_PATTERN.match(str(job_qname))
245247
if m:
246248
plan = activity.provenance.entity(m.groups()[0])
247-
return plan
249+
scatter_id = m.groups()[1]
250+
return plan, scatter_id
248251

249252
def _get_hash(self, prov_param):
250253
k = prov_param.id.localpart
@@ -463,9 +466,11 @@ def add_action(self, crate, activity, parent_instrument=None):
463466
"@type": "CreateAction",
464467
"name": activity.label,
465468
}))
466-
plan = self._resolve_plan(activity)
469+
plan, scatter_id = self._resolve_plan(activity)
467470
plan_tag = plan.id.localpart
471+
dest_base = Path(self.data_root)
468472
if plan_tag == "main":
473+
dest_base = dest_base / "main"
469474
assert str(activity.type) == "wfprov:WorkflowRun"
470475
instrument = workflow
471476
self.roc_engine_run["result"] = action
@@ -480,6 +485,7 @@ def to_wf_p(k):
480485
if parts[0] == "main":
481486
parts[0] = parent_instrument_fragment
482487
plan_tag = "/".join(parts)
488+
dest_base = dest_base / (f"{plan_tag}_{scatter_id}" if scatter_id else f"{plan_tag}")
483489
tool_name = self.step_maps[parent_instrument_fragment][plan_tag]["tool"]
484490
instrument = crate.dereference(f"{workflow.id}#{tool_name}")
485491
control_action = self.control_actions.get(plan_tag)
@@ -503,12 +509,14 @@ def to_wf_p(k):
503509
action["instrument"] = instrument
504510
action["startTime"] = activity.start().time.isoformat()
505511
action["endTime"] = activity.end().time.isoformat()
506-
action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
507-
action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
512+
action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage",
513+
dest_base / "in" if self.remap_names else "")
514+
action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation",
515+
dest_base / "out" if self.remap_names else "")
508516
for job in activity.steps():
509517
self.add_action(crate, job, parent_instrument=instrument)
510518

511-
def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
519+
def add_action_params(self, crate, activity, to_wf_p, ptype="usage", dest_base=""):
512520
action_params = []
513521
all_roles = set()
514522
for rel in getattr(activity, ptype)():
@@ -528,7 +536,7 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
528536
wf_p = crate.dereference(to_wf_p(k))
529537
k = get_fragment(k)
530538
v = rel.entity()
531-
value = self.convert_param(v, crate)
539+
value = self.convert_param(v, crate, dest_base=dest_base)
532540
if value is None:
533541
continue # param is optional with no default and was not set
534542
if {"ro:Folder", "wf4ever:File"} & set(str(_) for _ in v.types()):
@@ -565,7 +573,7 @@ def _set_alternate_name(prov_param, action_p, parent=None):
565573
if "alternateName" in parent:
566574
action_p["alternateName"] = (Path(parent["alternateName"]) / basename).as_posix()
567575

568-
def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
576+
def convert_param(self, prov_param, crate, convert_secondary=True, parent=None, dest_base=""):
569577
type_names = frozenset(str(_) for _ in prov_param.types())
570578
secondary_files = [_.generated_entity() for _ in prov_param.derivations()
571579
if str(_.type) == "cwlprov:SecondaryFile"]
@@ -589,7 +597,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
589597
basename = getattr(prov_param, "basename", hash_)
590598
else:
591599
basename = hash_
592-
dest = Path(parent.id if parent else "") / basename
600+
dest = Path(parent.id if parent else dest_base) / basename
593601
action_p = crate.dereference(dest.as_posix())
594602
if not action_p:
595603
source = self.manifest[hash_]
@@ -610,7 +618,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
610618
basename = getattr(prov_param, "basename", hash_)
611619
else:
612620
basename = hash_
613-
dest = Path(parent.id if parent else "") / basename
621+
dest = Path(parent.id if parent else dest_base) / basename
614622
action_p = crate.dereference(dest.as_posix())
615623
if not action_p:
616624
action_p = crate.add_directory(dest_path=dest)

tests/test_cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ def test_cli_convert_remap_names(data_dir, tmpdir):
101101
args = ["convert", str(root), "-o", str(crate_dir), "--remap-names"]
102102
assert runner.invoke(cli, args).exit_code == 0
103103
crate = ROCrate(crate_dir)
104-
assert crate.get("grepucase_in/")
105-
assert (crate_dir / "grepucase_in").is_dir()
104+
assert crate.get("data/main/in/grepucase_in/")
105+
assert (crate_dir / "data" / "main" / "in" / "grepucase_in").is_dir()
106106

107107

108108
def test_cli_report_provenance_minimal(data_dir, caplog):

tests/test_cwlprov_crate_builder.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,48 +1174,73 @@ def test_remap_names(data_dir, tmpdir):
11741174
assert len(wf_objects) == 2
11751175
assert len(wf_results) == 1
11761176
wf_objects_map = {_.id: _ for _ in wf_objects}
1177-
wf_input_dir = wf_objects_map.get("grepucase_in/")
1177+
wf_input_dir = wf_objects_map.get("data/main/in/grepucase_in/")
11781178
assert wf_input_dir
11791179
wf_output_dir = wf_results[0]
1180-
assert wf_output_dir.id == "ucase_out/"
1180+
assert wf_output_dir.id == "data/main/out/ucase_out/"
11811181
assert set(_.id for _ in wf_input_dir["hasPart"]) == {
1182-
"grepucase_in/bar", "grepucase_in/foo"
1182+
"data/main/in/grepucase_in/bar", "data/main/in/grepucase_in/foo"
11831183
}
11841184
assert set(_.id for _ in wf_output_dir["hasPart"]) == {
1185-
"ucase_out/bar.out/", "ucase_out/foo.out/"
1185+
"data/main/out/ucase_out/bar.out/", "data/main/out/ucase_out/foo.out/"
11861186
}
11871187
for d in wf_output_dir["hasPart"]:
1188-
if d.id == "ucase_out/bar.out/":
1189-
assert d["hasPart"][0].id == "ucase_out/bar.out/bar.out.out"
1188+
if d.id == "data/main/out/ucase_out/bar.out/":
1189+
assert d["hasPart"][0].id == "data/main/out/ucase_out/bar.out/bar.out.out"
11901190
else:
1191-
assert d["hasPart"][0].id == "ucase_out/foo.out/foo.out.out"
1191+
assert d["hasPart"][0].id == "data/main/out/ucase_out/foo.out/foo.out.out"
11921192
greptool_action = action_map["packed.cwl#greptool.cwl"]
11931193
greptool_objects = greptool_action["object"]
11941194
greptool_results = greptool_action["result"]
11951195
assert len(greptool_objects) == 2
11961196
assert len(greptool_results) == 1
11971197
greptool_objects_map = {_.id: _ for _ in greptool_objects}
1198-
greptool_input_dir = greptool_objects_map.get("grepucase_in/")
1199-
assert greptool_input_dir is wf_input_dir
1198+
greptool_input_dir = greptool_objects_map.get("data/main/grep/in/grepucase_in/")
1199+
assert greptool_input_dir
1200+
assert set(_.id for _ in greptool_input_dir["hasPart"]) == {
1201+
"data/main/grep/in/grepucase_in/bar", "data/main/grep/in/grepucase_in/foo"
1202+
}
12001203
greptool_output_dir = greptool_results[0]
1201-
assert greptool_output_dir.id == "grep_out/"
1204+
assert greptool_output_dir.id == "data/main/grep/out/grep_out/"
1205+
assert set(_.id for _ in greptool_output_dir["hasPart"]) == {
1206+
"data/main/grep/out/grep_out/bar.out", "data/main/grep/out/grep_out/foo.out"
1207+
}
12021208
ucasetool_action = action_map["packed.cwl#ucasetool.cwl"]
12031209
ucasetool_objects = ucasetool_action["object"]
12041210
ucasetool_results = ucasetool_action["result"]
12051211
assert len(ucasetool_objects) == 1
12061212
assert len(ucasetool_results) == 1
12071213
ucasetool_input_dir = ucasetool_objects[0]
1208-
assert ucasetool_input_dir is greptool_output_dir
1214+
assert ucasetool_input_dir.id == "data/main/ucase/in/grep_out/"
1215+
assert set(_.id for _ in ucasetool_input_dir["hasPart"]) == {
1216+
"data/main/ucase/in/grep_out/bar.out", "data/main/ucase/in/grep_out/foo.out"
1217+
}
12091218
ucasetool_output_dir = ucasetool_results[0]
1210-
assert ucasetool_output_dir is wf_output_dir
1219+
assert ucasetool_output_dir.id == "data/main/ucase/out/ucase_out/"
1220+
assert set(_.id for _ in ucasetool_output_dir["hasPart"]) == {
1221+
"data/main/ucase/out/ucase_out/bar.out/", "data/main/ucase/out/ucase_out/foo.out/"
1222+
}
1223+
1224+
for d in ucasetool_output_dir["hasPart"]:
1225+
if d.id == "data/main/ucase/out/ucase_out/bar.out/":
1226+
assert d["hasPart"][0].id == "data/main/ucase/out/ucase_out/bar.out/bar.out.out"
1227+
else:
1228+
assert d["hasPart"][0].id == "data/main/ucase/out/ucase_out/foo.out/foo.out.out"
1229+
12111230
for e in crate.data_entities:
12121231
assert "alternateName" not in e
12131232
for p in (
1214-
"grepucase_in/bar",
1215-
"grepucase_in/foo",
1216-
"grep_out/bar.out",
1217-
"grep_out/foo.out",
1218-
"ucase_out/bar.out/bar.out.out",
1219-
"ucase_out/foo.out/foo.out.out",
1233+
"data/main/in/grepucase_in/bar",
1234+
"data/main/in/grepucase_in/foo",
1235+
"data/main/out/ucase_out/bar.out/bar.out.out",
1236+
"data/main/out/ucase_out/foo.out/foo.out.out",
1237+
"data/main/grep/in/grepucase_in/bar",
1238+
"data/main/grep/in/grepucase_in/foo",
1239+
"data/main/grep/out/grep_out/bar.out",
1240+
"data/main/grep/out/grep_out/foo.out",
1241+
"data/main/ucase/in/grep_out/bar.out",
1242+
"data/main/ucase/in/grep_out/foo.out",
1243+
"data/main/ucase/out/ucase_out/bar.out/bar.out.out",
1244+
"data/main/ucase/out/ucase_out/foo.out/foo.out.out",
12201245
):
12211246
assert (output / p).is_file()

0 commit comments

Comments
 (0)