From 472fcc71aa472f57f3aca6724f14e3c81b097249 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 26 Jan 2025 16:34:54 +0100 Subject: [PATCH 01/72] Addressable data store Signed-off-by: Paolo Di Tommaso --- .../main/groovy/nextflow/cli/CmdCid.groovy | 78 +++++++++++++ .../main/groovy/nextflow/cli/Launcher.groovy | 3 +- .../nextflow/data/cid/CidObserver.groovy | 97 ++++++++++++++++ .../groovy/nextflow/data/cid/CidStore.groovy | 39 +++++++ .../nextflow/data/cid/DefaultCidStore.groovy | 67 +++++++++++ .../nextflow/data/cid/model/TaskOutput.groovy | 35 ++++++ .../nextflow/data/cid/model/TaskRun.groovy | 34 ++++++ .../nextflow/data/config/DataConfig.groovy | 53 +++++++++ .../nextflow/data/config/DataStoreOpts.groovy | 39 +++++++ .../groovy/nextflow/processor/TaskId.groovy | 2 + .../trace/DefaultObserverFactory.groovy | 6 + .../nextflow/data/cid/CidObserverTest.groovy | 104 ++++++++++++++++++ .../data/config/DataConfigTest.groovy | 42 +++++++ .../main/nextflow/extension/FilesEx.groovy | 1 + 14 files changed, 599 insertions(+), 1 deletion(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy new file mode 100644 index 0000000000..d6d242fd6d --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -0,0 +1,78 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.cli + +import com.beust.jcommander.Parameter +import groovy.transform.CompileStatic +import nextflow.exception.AbortOperationException +import nextflow.plugin.Plugins + +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class CmdCid extends CmdBase { + + private static final String NAME = 'cid' + + interface SubCmd { + String getName() + void apply(List result) + void usage(List result) + } + + private List commands = new ArrayList<>() + + CmdCid() { + + } + + @Parameter(hidden = true) + List args + + @Override + String getName() { + return NAME + } + + @Override + void run() { + if( !args ) { + return + } + // setup the plugins system and load the secrets provider + Plugins.init() + + getCmd(args).apply(args.drop(1)) + } + + protected SubCmd getCmd(List args) { + + def cmd = commands.find { it.name == args[0] } + if( cmd ) { + return cmd + } + + def matches = commands.collect{ it.name }.closest(args[0]) + def msg = "Unknown cloud sub-command: ${args[0]}" + if( matches ) + msg += " -- Did you mean one of these?\n" + matches.collect { " $it"}.join('\n') + throw new AbortOperationException(msg) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy index ec60408ecb..3ecc9785c5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy @@ -106,7 +106,8 @@ class Launcher { new CmdHelp(), new CmdSelfUpdate(), new CmdPlugin(), - new CmdInspect() + new CmdInspect(), + new CmdCid() ] if(SecretsLoader.isEnabled()) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy new file mode 100644 index 0000000000..1a97d0350c --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -0,0 +1,97 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributes + +import groovy.json.JsonOutput +import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.data.cid.model.TaskOutput +import nextflow.data.config.DataConfig +import nextflow.processor.TaskHandler +import nextflow.processor.TaskRun +import nextflow.script.params.FileOutParam +import nextflow.trace.TraceObserver +import nextflow.trace.TraceRecord +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class CidObserver implements TraceObserver { + + private CidStore store + + @Override + void onFlowCreate(Session session) { + store = new DefaultCidStore() + store.open(DataConfig.create(session)) + } + + @Override + void onProcessComplete(TaskHandler handler, TraceRecord trace) { + storeTaskInfo(handler.task) + } + + void storeTaskInfo(TaskRun task) { + // store the task run entry + storeTaskRun(task) + // store all task outputs files + final outputs = task.getOutputsByType(FileOutParam) + for( Map.Entry entry : outputs ) { + final value = entry.value + if( value instanceof Path ) { + storeTaskOutput(task, (Path)value) + } + else if( value instanceof Collection ) { + for( Path it : value ) + storeTaskOutput(task, (Path)it) + } + } + } + + protected void storeTaskRun(TaskRun task) { + final value = new nextflow.data.cid.model.TaskRun( + task.id.value, + task.getName(), + task.hash.toString() ) + // store in the underlying persistence + final key = "${value.hash}/.task" + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + protected void storeTaskOutput(TaskRun task, Path path) { + final attrs = readAttributes(path) + final rel = task.workDir.relativize(path).toString() + final key = "${task.hash}/${rel}" + final value = new TaskOutput( + "cid://$key", + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis() ) + // store in the underlying persistence + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + protected BasicFileAttributes readAttributes(Path path) { + Files.readAttributes(path, BasicFileAttributes) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy new file mode 100644 index 0000000000..6591e67b0c --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.util.function.Consumer + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +interface CidStore { + + void open(DataConfig config) + + void save(String key, Object value) + + void list(String key, Consumer consumer) + + Object load(String key) + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy new file mode 100644 index 0000000000..3058a441e6 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -0,0 +1,67 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.nio.file.Files +import java.nio.file.Path +import java.util.function.Consumer + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.config.DataConfig +import nextflow.exception.AbortOperationException + +/** + * + * @author Paolo Di Tommaso + */ +@Slf4j +@CompileStatic +class DefaultCidStore implements CidStore { + + private Path location + + void open(DataConfig config) { + location = config.store.location + if( !Files.exists(location) && !Files.createDirectories(location) ) { + throw new AbortOperationException("Unable to create CID store directory: $location") + } + } + + @Override + void save(String key, Object value) { + final path = location.resolve(key) + Files.createDirectories(path.parent) + log.debug "Save CID file path: $path" + path.text = value + } + + @Override + void list(String key, Consumer consumer) { + for( Path it : Files.walk(location.resolve(key)) ) { + final fileKey = location.relativize(it).toString() + consumer.accept(fileKey) + } + } + + @Override + Object load(String key) { + location.resolve(key).text + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy new file mode 100644 index 0000000000..701734c751 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy @@ -0,0 +1,35 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * + * @author Paolo Di Tommaso + */ +@Canonical +@CompileStatic +class TaskOutput { + String uri + long size + long createdAt + long modifiedAt + List annotations +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy new file mode 100644 index 0000000000..9c5c0d9b91 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * + * @author Paolo Di Tommaso + */ +@Canonical +@CompileStatic +class TaskRun { + int id + String name + String hash + List annotations +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy new file mode 100644 index 0000000000..7d188ef9a9 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy @@ -0,0 +1,53 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.config + +import groovy.transform.CompileStatic +import nextflow.Global +import nextflow.Session + +/** + * Model workflow data config + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class DataConfig { + + final DataStoreOpts store + + DataConfig(Map opts) { + this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) + } + + static DataConfig create(Session session) { + if( session ) { + return new DataConfig(session.config.navigate('workflow.data') as Map ?: Map.of()) + } + else + throw new IllegalStateException("Missing Nextflow session") + } + + static DataConfig create() { + create(getSession()) + } + + static private Session getSession() { + return Global.session as Session + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy new file mode 100644 index 0000000000..f5873251ef --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.config + +import java.nio.file.Path + +import groovy.transform.CompileStatic +/** + * Model data store options + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class DataStoreOpts { + + final Path location + + DataStoreOpts(Map opts) { + this.location = opts.location + ? Path.of(opts.location as String) + : Path.of('.').toAbsolutePath().normalize().resolve('data') + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy index 3576b9fde7..fbd4784a05 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy @@ -38,6 +38,8 @@ class TaskId extends Number implements Comparable, Serializable, Cloneable { private final int value + int getValue() { value } + static TaskId of( value ) { if( value instanceof Integer ) return new TaskId(value) diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 6c391625c9..4782a2d2f7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -3,6 +3,7 @@ package nextflow.trace import java.nio.file.Path import nextflow.Session +import nextflow.data.cid.CidObserver /** * Creates Nextflow observes object @@ -25,9 +26,14 @@ class DefaultObserverFactory implements TraceObserverFactory { createTimelineObserver(result) createDagObserver(result) createAnsiLogObserver(result) + createCidObserver(result) return result } + protected void createCidObserver(Collection result) { + result.add( new CidObserver() ) + } + protected void createAnsiLogObserver(Collection result) { if( session.ansiLog ) { session.ansiLogObserver = new AnsiLogObserver() diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy new file mode 100644 index 0000000000..fe06bb0bb7 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -0,0 +1,104 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.nio.file.Files +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.attribute.FileTime +import java.time.Instant + +import com.google.common.hash.HashCode +import nextflow.Session +import nextflow.processor.TaskId +import nextflow.processor.TaskRun +import spock.lang.Specification +/** + * + * @author Paolo Di Tommaso + */ +class CidObserverTest extends Specification { + + def 'should save task run' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def session = Mock(Session) { getConfig()>>config } + def observer = new CidObserver() + observer.onFlowCreate(session) + and: + def hash = HashCode.fromInt(123456789) + and: + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + } + when: + observer.storeTaskRun(task) + then: + folder.resolve(hash.toString()).text == '{"id":100,"name":"foo","hash":"15cd5b07","annotations":null}' + + cleanup: + folder?.deleteDir() + } + + def 'should save task output' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def session = Mock(Session) { getConfig()>>config } + def observer = Spy(new CidObserver()) + observer.onFlowCreate(session) + and: + def workDir = folder.resolve('12/34567890') + Files.createDirectories(workDir) + and: + def outFile = workDir.resolve('foo/bar/file.bam') + Files.createDirectories(outFile.parent) + outFile.text = 'some data' + and: + def hash = HashCode.fromInt(123456789) + and: + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> workDir + } + and: + def ts1 = Instant.ofEpochMilli(1737914400) + def ts2 = Instant.ofEpochMilli(1737914500) + def attrs = Mock(BasicFileAttributes) { + size() >> 100 + creationTime() >> FileTime.from(ts1) + lastModifiedTime() >> FileTime.from(ts2) + } + and: + observer.readAttributes(outFile) >> attrs + + when: + observer.storeTaskOutput(task, outFile) + then: + folder.resolve("${hash}/foo/bar/file.bam").text + == '{"uri":"cid://15cd5b07/foo/bar/file.bam","size":100,"createdAt":1737914400,"modifiedAt":1737914500,"annotations":null}' + + cleanup: + folder?.deleteDir() + } + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy new file mode 100644 index 0000000000..3c7ac18489 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy @@ -0,0 +1,42 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.config + +import java.nio.file.Path + +import spock.lang.Specification +/** + * + * @author Paolo Di Tommaso + */ +class DataConfigTest extends Specification { + + def 'should create default config' () { + when: + def config = new DataConfig(Map.of()) + then: + config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() + } + + def 'should create data config' () { + when: + def config = new DataConfig(store: [location: "/some/data/store"]) + then: + config.store.location == Path.of("/some/data/store") + } +} diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index 255d52bdd3..6f8f6fb777 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -494,6 +494,7 @@ class FilesEx { return true } catch(IOException e) { + log.debug "Failed to create directory '$self'", e return false } } From b5e8c4645b3eeb71dceb2106333c18eca59f13ed Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Fri, 31 Jan 2025 12:17:33 +0100 Subject: [PATCH 02/72] Addressable data store [wip 2] [ci skip] Signed-off-by: Paolo Di Tommaso --- .../nextflow/data/cid/CidObserver.groovy | 11 ++++++-- .../nextflow/data/cid/DefaultCidStore.groovy | 2 +- .../nextflow/data/cid/model/DataType.groovy | 26 +++++++++++++++++++ .../nextflow/data/cid/model/TaskOutput.groovy | 2 ++ .../nextflow/data/cid/model/TaskRun.groovy | 1 + 5 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 1a97d0350c..8934f6b04a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -24,6 +24,7 @@ import java.nio.file.attribute.BasicFileAttributes import groovy.json.JsonOutput import groovy.transform.CompileStatic import nextflow.Session +import nextflow.data.cid.model.DataType import nextflow.data.cid.model.TaskOutput import nextflow.data.config.DataConfig import nextflow.processor.TaskHandler @@ -31,6 +32,8 @@ import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord +import nextflow.util.CacheHelper + /** * * @author Paolo Di Tommaso @@ -70,20 +73,24 @@ class CidObserver implements TraceObserver { protected void storeTaskRun(TaskRun task) { final value = new nextflow.data.cid.model.TaskRun( + DataType.Task, task.id.value, task.getName(), task.hash.toString() ) // store in the underlying persistence - final key = "${value.hash}/.task" + final key = "${value.hash}/.data.json" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } protected void storeTaskOutput(TaskRun task, Path path) { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() - final key = "${task.hash}/${rel}" + final key = "${task.hash}/${rel}/.data.json" + final hash = CacheHelper.hasher(path).hash().toString() final value = new TaskOutput( + DataType.Output, "cid://$key", + hash, attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 3058a441e6..ae6faaeceb 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -37,7 +37,7 @@ class DefaultCidStore implements CidStore { private Path location void open(DataConfig config) { - location = config.store.location + location = config.store.location.resolve('.meta') if( !Files.exists(location) && !Files.createDirectories(location) ) { throw new AbortOperationException("Unable to create CID store directory: $location") } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy new file mode 100644 index 0000000000..23cfc19d03 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy @@ -0,0 +1,26 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +/** + * + * @author Paolo Di Tommaso + */ +enum DataType { + Task, Workflow, Output +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy index 701734c751..dd27ead4e0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy @@ -27,7 +27,9 @@ import groovy.transform.CompileStatic @Canonical @CompileStatic class TaskOutput { + DataType type String uri + String hash long size long createdAt long modifiedAt diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index 9c5c0d9b91..2b91df426c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -27,6 +27,7 @@ import groovy.transform.CompileStatic @Canonical @CompileStatic class TaskRun { + DataType type int id String name String hash From 669afd5d84aa7d738b1d05fb52c5541104bee972 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Fri, 31 Jan 2025 18:17:25 +0100 Subject: [PATCH 03/72] Minor changes [ci skip] Signed-off-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/data/cid/CidObserver.groovy | 7 +++++-- .../main/groovy/nextflow/data/cid/model/TaskOutput.groovy | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 8934f6b04a..348c9ad1d5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -85,11 +85,14 @@ class CidObserver implements TraceObserver { protected void storeTaskOutput(TaskRun task, Path path) { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() - final key = "${task.hash}/${rel}/.data.json" + final cid = "${task.hash}/${rel}" + final uri = "cid://${cid}" + final key = "${cid}/.data.json" final hash = CacheHelper.hasher(path).hash().toString() final value = new TaskOutput( DataType.Output, - "cid://$key", + uri, + path.toUriString(), hash, attrs.size(), attrs.creationTime().toMillis(), diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy index dd27ead4e0..6467d36c6f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy @@ -29,6 +29,7 @@ import groovy.transform.CompileStatic class TaskOutput { DataType type String uri + String realPath String hash long size long createdAt From c93a713cbf1609601bb0d15018d83323a182a7e1 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 26 Jan 2025 16:34:54 +0100 Subject: [PATCH 04/72] Addressable data store Signed-off-by: Paolo Di Tommaso --- .../main/groovy/nextflow/cli/CmdCid.groovy | 78 +++++++++++++ .../main/groovy/nextflow/cli/Launcher.groovy | 3 +- .../nextflow/data/cid/CidObserver.groovy | 97 ++++++++++++++++ .../groovy/nextflow/data/cid/CidStore.groovy | 39 +++++++ .../nextflow/data/cid/DefaultCidStore.groovy | 67 +++++++++++ .../nextflow/data/cid/model/TaskOutput.groovy | 35 ++++++ .../nextflow/data/cid/model/TaskRun.groovy | 34 ++++++ .../nextflow/data/config/DataConfig.groovy | 53 +++++++++ .../nextflow/data/config/DataStoreOpts.groovy | 39 +++++++ .../groovy/nextflow/processor/TaskId.groovy | 2 + .../trace/DefaultObserverFactory.groovy | 6 + .../nextflow/data/cid/CidObserverTest.groovy | 104 ++++++++++++++++++ .../data/config/DataConfigTest.groovy | 42 +++++++ .../main/nextflow/extension/FilesEx.groovy | 1 + 14 files changed, 599 insertions(+), 1 deletion(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy new file mode 100644 index 0000000000..d6d242fd6d --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -0,0 +1,78 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.cli + +import com.beust.jcommander.Parameter +import groovy.transform.CompileStatic +import nextflow.exception.AbortOperationException +import nextflow.plugin.Plugins + +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class CmdCid extends CmdBase { + + private static final String NAME = 'cid' + + interface SubCmd { + String getName() + void apply(List result) + void usage(List result) + } + + private List commands = new ArrayList<>() + + CmdCid() { + + } + + @Parameter(hidden = true) + List args + + @Override + String getName() { + return NAME + } + + @Override + void run() { + if( !args ) { + return + } + // setup the plugins system and load the secrets provider + Plugins.init() + + getCmd(args).apply(args.drop(1)) + } + + protected SubCmd getCmd(List args) { + + def cmd = commands.find { it.name == args[0] } + if( cmd ) { + return cmd + } + + def matches = commands.collect{ it.name }.closest(args[0]) + def msg = "Unknown cloud sub-command: ${args[0]}" + if( matches ) + msg += " -- Did you mean one of these?\n" + matches.collect { " $it"}.join('\n') + throw new AbortOperationException(msg) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy index ec60408ecb..3ecc9785c5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy @@ -106,7 +106,8 @@ class Launcher { new CmdHelp(), new CmdSelfUpdate(), new CmdPlugin(), - new CmdInspect() + new CmdInspect(), + new CmdCid() ] if(SecretsLoader.isEnabled()) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy new file mode 100644 index 0000000000..1a97d0350c --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -0,0 +1,97 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributes + +import groovy.json.JsonOutput +import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.data.cid.model.TaskOutput +import nextflow.data.config.DataConfig +import nextflow.processor.TaskHandler +import nextflow.processor.TaskRun +import nextflow.script.params.FileOutParam +import nextflow.trace.TraceObserver +import nextflow.trace.TraceRecord +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class CidObserver implements TraceObserver { + + private CidStore store + + @Override + void onFlowCreate(Session session) { + store = new DefaultCidStore() + store.open(DataConfig.create(session)) + } + + @Override + void onProcessComplete(TaskHandler handler, TraceRecord trace) { + storeTaskInfo(handler.task) + } + + void storeTaskInfo(TaskRun task) { + // store the task run entry + storeTaskRun(task) + // store all task outputs files + final outputs = task.getOutputsByType(FileOutParam) + for( Map.Entry entry : outputs ) { + final value = entry.value + if( value instanceof Path ) { + storeTaskOutput(task, (Path)value) + } + else if( value instanceof Collection ) { + for( Path it : value ) + storeTaskOutput(task, (Path)it) + } + } + } + + protected void storeTaskRun(TaskRun task) { + final value = new nextflow.data.cid.model.TaskRun( + task.id.value, + task.getName(), + task.hash.toString() ) + // store in the underlying persistence + final key = "${value.hash}/.task" + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + protected void storeTaskOutput(TaskRun task, Path path) { + final attrs = readAttributes(path) + final rel = task.workDir.relativize(path).toString() + final key = "${task.hash}/${rel}" + final value = new TaskOutput( + "cid://$key", + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis() ) + // store in the underlying persistence + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + protected BasicFileAttributes readAttributes(Path path) { + Files.readAttributes(path, BasicFileAttributes) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy new file mode 100644 index 0000000000..6591e67b0c --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.util.function.Consumer + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +interface CidStore { + + void open(DataConfig config) + + void save(String key, Object value) + + void list(String key, Consumer consumer) + + Object load(String key) + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy new file mode 100644 index 0000000000..3058a441e6 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -0,0 +1,67 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.nio.file.Files +import java.nio.file.Path +import java.util.function.Consumer + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.config.DataConfig +import nextflow.exception.AbortOperationException + +/** + * + * @author Paolo Di Tommaso + */ +@Slf4j +@CompileStatic +class DefaultCidStore implements CidStore { + + private Path location + + void open(DataConfig config) { + location = config.store.location + if( !Files.exists(location) && !Files.createDirectories(location) ) { + throw new AbortOperationException("Unable to create CID store directory: $location") + } + } + + @Override + void save(String key, Object value) { + final path = location.resolve(key) + Files.createDirectories(path.parent) + log.debug "Save CID file path: $path" + path.text = value + } + + @Override + void list(String key, Consumer consumer) { + for( Path it : Files.walk(location.resolve(key)) ) { + final fileKey = location.relativize(it).toString() + consumer.accept(fileKey) + } + } + + @Override + Object load(String key) { + location.resolve(key).text + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy new file mode 100644 index 0000000000..701734c751 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy @@ -0,0 +1,35 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * + * @author Paolo Di Tommaso + */ +@Canonical +@CompileStatic +class TaskOutput { + String uri + long size + long createdAt + long modifiedAt + List annotations +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy new file mode 100644 index 0000000000..9c5c0d9b91 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * + * @author Paolo Di Tommaso + */ +@Canonical +@CompileStatic +class TaskRun { + int id + String name + String hash + List annotations +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy new file mode 100644 index 0000000000..7d188ef9a9 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy @@ -0,0 +1,53 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.config + +import groovy.transform.CompileStatic +import nextflow.Global +import nextflow.Session + +/** + * Model workflow data config + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class DataConfig { + + final DataStoreOpts store + + DataConfig(Map opts) { + this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) + } + + static DataConfig create(Session session) { + if( session ) { + return new DataConfig(session.config.navigate('workflow.data') as Map ?: Map.of()) + } + else + throw new IllegalStateException("Missing Nextflow session") + } + + static DataConfig create() { + create(getSession()) + } + + static private Session getSession() { + return Global.session as Session + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy new file mode 100644 index 0000000000..f5873251ef --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.config + +import java.nio.file.Path + +import groovy.transform.CompileStatic +/** + * Model data store options + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class DataStoreOpts { + + final Path location + + DataStoreOpts(Map opts) { + this.location = opts.location + ? Path.of(opts.location as String) + : Path.of('.').toAbsolutePath().normalize().resolve('data') + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy index 3576b9fde7..fbd4784a05 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy @@ -38,6 +38,8 @@ class TaskId extends Number implements Comparable, Serializable, Cloneable { private final int value + int getValue() { value } + static TaskId of( value ) { if( value instanceof Integer ) return new TaskId(value) diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 6c391625c9..4782a2d2f7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -3,6 +3,7 @@ package nextflow.trace import java.nio.file.Path import nextflow.Session +import nextflow.data.cid.CidObserver /** * Creates Nextflow observes object @@ -25,9 +26,14 @@ class DefaultObserverFactory implements TraceObserverFactory { createTimelineObserver(result) createDagObserver(result) createAnsiLogObserver(result) + createCidObserver(result) return result } + protected void createCidObserver(Collection result) { + result.add( new CidObserver() ) + } + protected void createAnsiLogObserver(Collection result) { if( session.ansiLog ) { session.ansiLogObserver = new AnsiLogObserver() diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy new file mode 100644 index 0000000000..fe06bb0bb7 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -0,0 +1,104 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import java.nio.file.Files +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.attribute.FileTime +import java.time.Instant + +import com.google.common.hash.HashCode +import nextflow.Session +import nextflow.processor.TaskId +import nextflow.processor.TaskRun +import spock.lang.Specification +/** + * + * @author Paolo Di Tommaso + */ +class CidObserverTest extends Specification { + + def 'should save task run' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def session = Mock(Session) { getConfig()>>config } + def observer = new CidObserver() + observer.onFlowCreate(session) + and: + def hash = HashCode.fromInt(123456789) + and: + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + } + when: + observer.storeTaskRun(task) + then: + folder.resolve(hash.toString()).text == '{"id":100,"name":"foo","hash":"15cd5b07","annotations":null}' + + cleanup: + folder?.deleteDir() + } + + def 'should save task output' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def session = Mock(Session) { getConfig()>>config } + def observer = Spy(new CidObserver()) + observer.onFlowCreate(session) + and: + def workDir = folder.resolve('12/34567890') + Files.createDirectories(workDir) + and: + def outFile = workDir.resolve('foo/bar/file.bam') + Files.createDirectories(outFile.parent) + outFile.text = 'some data' + and: + def hash = HashCode.fromInt(123456789) + and: + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> workDir + } + and: + def ts1 = Instant.ofEpochMilli(1737914400) + def ts2 = Instant.ofEpochMilli(1737914500) + def attrs = Mock(BasicFileAttributes) { + size() >> 100 + creationTime() >> FileTime.from(ts1) + lastModifiedTime() >> FileTime.from(ts2) + } + and: + observer.readAttributes(outFile) >> attrs + + when: + observer.storeTaskOutput(task, outFile) + then: + folder.resolve("${hash}/foo/bar/file.bam").text + == '{"uri":"cid://15cd5b07/foo/bar/file.bam","size":100,"createdAt":1737914400,"modifiedAt":1737914500,"annotations":null}' + + cleanup: + folder?.deleteDir() + } + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy new file mode 100644 index 0000000000..3c7ac18489 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy @@ -0,0 +1,42 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.config + +import java.nio.file.Path + +import spock.lang.Specification +/** + * + * @author Paolo Di Tommaso + */ +class DataConfigTest extends Specification { + + def 'should create default config' () { + when: + def config = new DataConfig(Map.of()) + then: + config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() + } + + def 'should create data config' () { + when: + def config = new DataConfig(store: [location: "/some/data/store"]) + then: + config.store.location == Path.of("/some/data/store") + } +} diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index 255d52bdd3..6f8f6fb777 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -494,6 +494,7 @@ class FilesEx { return true } catch(IOException e) { + log.debug "Failed to create directory '$self'", e return false } } From c0c660f9930e880dc8fd7f7ac6f5040bdc213d09 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Fri, 31 Jan 2025 12:17:33 +0100 Subject: [PATCH 05/72] Addressable data store [wip 2] [ci skip] Signed-off-by: Paolo Di Tommaso --- .../nextflow/data/cid/CidObserver.groovy | 11 ++++++-- .../nextflow/data/cid/DefaultCidStore.groovy | 2 +- .../nextflow/data/cid/model/DataType.groovy | 26 +++++++++++++++++++ .../nextflow/data/cid/model/TaskOutput.groovy | 2 ++ .../nextflow/data/cid/model/TaskRun.groovy | 1 + 5 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 1a97d0350c..8934f6b04a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -24,6 +24,7 @@ import java.nio.file.attribute.BasicFileAttributes import groovy.json.JsonOutput import groovy.transform.CompileStatic import nextflow.Session +import nextflow.data.cid.model.DataType import nextflow.data.cid.model.TaskOutput import nextflow.data.config.DataConfig import nextflow.processor.TaskHandler @@ -31,6 +32,8 @@ import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord +import nextflow.util.CacheHelper + /** * * @author Paolo Di Tommaso @@ -70,20 +73,24 @@ class CidObserver implements TraceObserver { protected void storeTaskRun(TaskRun task) { final value = new nextflow.data.cid.model.TaskRun( + DataType.Task, task.id.value, task.getName(), task.hash.toString() ) // store in the underlying persistence - final key = "${value.hash}/.task" + final key = "${value.hash}/.data.json" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } protected void storeTaskOutput(TaskRun task, Path path) { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() - final key = "${task.hash}/${rel}" + final key = "${task.hash}/${rel}/.data.json" + final hash = CacheHelper.hasher(path).hash().toString() final value = new TaskOutput( + DataType.Output, "cid://$key", + hash, attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 3058a441e6..ae6faaeceb 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -37,7 +37,7 @@ class DefaultCidStore implements CidStore { private Path location void open(DataConfig config) { - location = config.store.location + location = config.store.location.resolve('.meta') if( !Files.exists(location) && !Files.createDirectories(location) ) { throw new AbortOperationException("Unable to create CID store directory: $location") } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy new file mode 100644 index 0000000000..23cfc19d03 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy @@ -0,0 +1,26 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +/** + * + * @author Paolo Di Tommaso + */ +enum DataType { + Task, Workflow, Output +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy index 701734c751..dd27ead4e0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy @@ -27,7 +27,9 @@ import groovy.transform.CompileStatic @Canonical @CompileStatic class TaskOutput { + DataType type String uri + String hash long size long createdAt long modifiedAt diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index 9c5c0d9b91..2b91df426c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -27,6 +27,7 @@ import groovy.transform.CompileStatic @Canonical @CompileStatic class TaskRun { + DataType type int id String name String hash From 2a2d76f2f05a968dcb7983f5e290b58cc5e8533d Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Fri, 31 Jan 2025 18:17:25 +0100 Subject: [PATCH 06/72] Minor changes [ci skip] Signed-off-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/data/cid/CidObserver.groovy | 7 +++++-- .../main/groovy/nextflow/data/cid/model/TaskOutput.groovy | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 8934f6b04a..348c9ad1d5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -85,11 +85,14 @@ class CidObserver implements TraceObserver { protected void storeTaskOutput(TaskRun task, Path path) { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() - final key = "${task.hash}/${rel}/.data.json" + final cid = "${task.hash}/${rel}" + final uri = "cid://${cid}" + final key = "${cid}/.data.json" final hash = CacheHelper.hasher(path).hash().toString() final value = new TaskOutput( DataType.Output, - "cid://$key", + uri, + path.toUriString(), hash, attrs.size(), attrs.creationTime().toMillis(), diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy index dd27ead4e0..6467d36c6f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy @@ -29,6 +29,7 @@ import groovy.transform.CompileStatic class TaskOutput { DataType type String uri + String realPath String hash long size long createdAt From a2139e36d5e7355f79097257567214d62d193eb4 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 12 Feb 2025 15:38:18 +0100 Subject: [PATCH 07/72] M0 implementation Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 30 +++++- .../nextflow/data/cid/CidObserver.groovy | 95 +++++++++++++++++-- .../groovy/nextflow/data/cid/CidStore.groovy | 4 + .../nextflow/data/cid/DefaultCidStore.groovy | 4 + .../nextflow/data/cid/model/DataType.groovy | 2 +- .../{TaskOutput.groovy => Output.groovy} | 6 +- .../nextflow/data/cid/model/TaskRun.groovy | 1 + .../nextflow/data/cid/model/Workflow.groovy | 36 +++++++ .../data/cid/model/WorkflowRun.groovy | 35 +++++++ .../nextflow/data/config/DataConfig.groovy | 2 +- .../nextflow/processor/PublishDir.groovy | 25 ++++- .../trace/DefaultObserverFactory.groovy | 4 +- .../src/main/nextflow/file/FileHelper.groovy | 24 ++++- 13 files changed, 252 insertions(+), 16 deletions(-) rename modules/nextflow/src/main/groovy/nextflow/data/cid/model/{TaskOutput.groovy => Output.groovy} (94%) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 09152bc301..ca846bfcd8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -16,6 +16,8 @@ package nextflow +import nextflow.util.CacheHelper + import java.nio.file.Files import java.nio.file.Path import java.nio.file.Paths @@ -254,6 +256,14 @@ class Session implements ISession { private boolean statsEnabled + private volatile boolean cidEnabled + + boolean getCidEnabled() { cidEnabled } + + private HashCode executionHash + + String getExecutionHash() { executionHash } + private WorkflowMetadata workflowMetadata private WorkflowStatsObserver statsObserver @@ -393,6 +403,10 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) + if (config.cid) { + this.cidEnabled = true + } + } protected Path cloudCachePath(Map cloudcache, Path workDir) { @@ -405,12 +419,27 @@ class Session implements ISession { } return result } + private HashCode generateExecutionHash(ScriptFile scriptFile){ + List keys = [generateScriptHash(scriptFile).toString(), scriptFile?.repository, scriptFile?.commitId, uniqueId, (Map)config.params] + return CacheHelper.hasher(keys).hash() + } + + private HashCode generateScriptHash(ScriptFile scriptFile){ + List keys = [ scriptFile?.scriptId ] + for( Path p : ScriptMeta.allScriptNames().values() ){ + keys << CacheHelper.hasher(p.text).hash().toString() + } + return CacheHelper.hasher(keys).hash() + } /** * Initialize the session workDir, libDir, baseDir and scriptName variables */ Session init( ScriptFile scriptFile, List args=null ) { + if(cidEnabled) { + this.executionHash = generateExecutionHash(scriptFile) + } if(!workDir.mkdirs()) throw new AbortOperationException("Cannot create work-dir: $workDir -- Make sure you have write permissions or specify a different directory by using the `-w` command line option") log.debug "Work-dir: ${workDir.toUriString()} [${FileHelper.getPathFsType(workDir)}]" @@ -439,7 +468,6 @@ class Session implements ISession { binding.setArgs( new ScriptRunner.ArgsList(args) ) cache = CacheFactory.create(uniqueId,runName).open() - return this } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 348c9ad1d5..0e11526754 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -17,6 +17,11 @@ package nextflow.data.cid +import com.google.common.hash.HashCode +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowRun +import nextflow.file.FileHelper + import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes @@ -25,7 +30,7 @@ import groovy.json.JsonOutput import groovy.transform.CompileStatic import nextflow.Session import nextflow.data.cid.model.DataType -import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.Output import nextflow.data.config.DataConfig import nextflow.processor.TaskHandler import nextflow.processor.TaskRun @@ -42,19 +47,43 @@ import nextflow.util.CacheHelper class CidObserver implements TraceObserver { private CidStore store + private Session session @Override void onFlowCreate(Session session) { + this.session = session store = new DefaultCidStore() store.open(DataConfig.create(session)) } + void onFlowBegin() { + storeWorkflowRun() + } + + protected void storeWorkflowRun() { + final workflow = new Workflow( + DataType.Workflow, + session.workflowMetadata.scriptFile.toString(), + session.workflowMetadata.scriptId.toString(), + session.workflowMetadata.repository, + session.workflowMetadata.commitId + ) + final value = new WorkflowRun( + DataType.WorkflowRun, + workflow, + session.uniqueId.toString(), + session.runName, + session.params + ) + final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) + store.save("${session.executionHash}/.data.json", content) + } @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { storeTaskInfo(handler.task) } - void storeTaskInfo(TaskRun task) { + protected void storeTaskInfo(TaskRun task) { // store the task run entry storeTaskRun(task) // store all task outputs files @@ -76,7 +105,9 @@ class CidObserver implements TraceObserver { DataType.Task, task.id.value, task.getName(), - task.hash.toString() ) + task.hash.toString(), + convertToReferences(task.inputFilesMap) + ) // store in the underlying persistence final key = "${value.hash}/.data.json" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) @@ -86,14 +117,13 @@ class CidObserver implements TraceObserver { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() final cid = "${task.hash}/${rel}" - final uri = "cid://${cid}" final key = "${cid}/.data.json" final hash = CacheHelper.hasher(path).hash().toString() - final value = new TaskOutput( + final value = new Output( DataType.Output, - uri, - path.toUriString(), + path.toString(), hash, + "cid://$task.hash", attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) @@ -104,4 +134,55 @@ class CidObserver implements TraceObserver { protected BasicFileAttributes readAttributes(Path path) { Files.readAttributes(path, BasicFileAttributes) } + + @Override + void onFilePublish(Path destination, Path source){ + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = session.outputDir.relativize(destination).toString() + final key = "${rel}/.data.json" + final sourceReference = getSourceReference(source) + final attrs = readAttributes(destination) + final value = new Output( + DataType.Output, + destination.toString(), + hash, + sourceReference, + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis() ) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + String getSourceReference(Path source){ + final hash = FileHelper.getTaskHashFromPath(source, session.workDir) + if (hash) { + final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() + return "cid://$hash/$target" + } + return null + } + + @Override + void onFilePublish(Path destination){ + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = session.outputDir.relativize(destination).toString() + final attrs = readAttributes(destination) + final value = new Output( + DataType.Output, + destination.toString(), + hash, + session.executionHash, + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis() ) + store.save(rel, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } + + protected Map convertToReferences(Map inputs) { + Map references = new HashMap() + inputs.each { name, path -> + final ref = getSourceReference(path) + references.put(name, ref ? ref : path.toString())} + return references + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy index 6591e67b0c..67017f0bf0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy @@ -17,6 +17,7 @@ package nextflow.data.cid +import java.nio.file.Path import java.util.function.Consumer import groovy.transform.CompileStatic @@ -36,4 +37,7 @@ interface CidStore { Object load(String key) + Path getPath() + + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index ae6faaeceb..1e7c22cec8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -64,4 +64,8 @@ class DefaultCidStore implements CidStore { location.resolve(key).text } + @Override + Path getPath(){ location } + + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy index 23cfc19d03..ccacbb145d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy @@ -22,5 +22,5 @@ package nextflow.data.cid.model * @author Paolo Di Tommaso */ enum DataType { - Task, Workflow, Output + Task, Workflow, WorkflowRun, Output } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy similarity index 94% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy rename to modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy index 6467d36c6f..738f843cc6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskOutput.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy @@ -26,11 +26,11 @@ import groovy.transform.CompileStatic */ @Canonical @CompileStatic -class TaskOutput { +class Output { DataType type - String uri - String realPath + String path String hash + String source long size long createdAt long modifiedAt diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index 2b91df426c..fea557ee08 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -31,5 +31,6 @@ class TaskRun { int id String name String hash + Map inputs List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy new file mode 100644 index 0000000000..a52ddbd814 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy @@ -0,0 +1,36 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + + +/** + * + * @author Jorge Ejarque = 2) { + final bucket = relativePath.getName(0).toString() + if (bucket.size() == 2) { + final strHash = bucket + relativePath.getName(1).toString() + try { + return HashCode.fromString(strHash) + } catch (Throwable e) { + log.debug("String '${strHash}' is not a valid hash", e) + } + } + } + } + return null + } } From fddc5f77b35d358ce0bfd72a7bce1a119c44993b Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 12 Feb 2025 16:58:04 +0100 Subject: [PATCH 08/72] fix tests Signed-off-by: jorgee --- .../nextflow/data/cid/CidObserver.groovy | 2 +- .../nextflow/processor/PublishDir.groovy | 2 +- .../nextflow/data/cid/CidObserverTest.groovy | 31 ++++++++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 0e11526754..ef1757e2ca 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -106,7 +106,7 @@ class CidObserver implements TraceObserver { task.id.value, task.getName(), task.hash.toString(), - convertToReferences(task.inputFilesMap) + task.inputFilesMap ? convertToReferences(task.inputFilesMap): null ) // store in the underlying persistence final key = "${value.hash}/.data.json" diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index fc82cb80e0..e36fa051b1 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -146,7 +146,7 @@ class PublishDir { final resolved = value instanceof Closure ? value.call() : value if( resolved instanceof String || resolved instanceof GString ) nullPathWarn = checkNull(resolved.toString()) - if( session.cidEnabled ){ + if( session?.cidEnabled ){ final resolvedPath = FileHelper.toPath(resolved) if (resolvedPath.isAbsolute()){ log.warn("CID store is enabled but publish dir is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index fe06bb0bb7..233520c96a 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -17,6 +17,9 @@ package nextflow.data.cid +import groovy.json.JsonOutput +import nextflow.util.CacheHelper + import java.nio.file.Files import java.nio.file.attribute.BasicFileAttributes import java.nio.file.attribute.FileTime @@ -36,7 +39,7 @@ class CidObserverTest extends Specification { def 'should save task run' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [cid:[store:[location:folder.toString()]]] def session = Mock(Session) { getConfig()>>config } def observer = new CidObserver() observer.onFlowCreate(session) @@ -51,7 +54,7 @@ class CidObserverTest extends Specification { when: observer.storeTaskRun(task) then: - folder.resolve(hash.toString()).text == '{"id":100,"name":"foo","hash":"15cd5b07","annotations":null}' + folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","hash":"15cd5b07","inputs": null,"annotations":null}') cleanup: folder?.deleteDir() @@ -60,7 +63,7 @@ class CidObserverTest extends Specification { def 'should save task output' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [cid:[store:[location:folder.toString()]]] def session = Mock(Session) { getConfig()>>config } def observer = Spy(new CidObserver()) observer.onFlowCreate(session) @@ -71,6 +74,7 @@ class CidObserverTest extends Specification { def outFile = workDir.resolve('foo/bar/file.bam') Files.createDirectories(outFile.parent) outFile.text = 'some data' + def fileHash = CacheHelper.hasher(outFile).hash().toString() and: def hash = HashCode.fromInt(123456789) and: @@ -81,21 +85,24 @@ class CidObserverTest extends Specification { getWorkDir() >> workDir } and: - def ts1 = Instant.ofEpochMilli(1737914400) - def ts2 = Instant.ofEpochMilli(1737914500) - def attrs = Mock(BasicFileAttributes) { - size() >> 100 - creationTime() >> FileTime.from(ts1) - lastModifiedTime() >> FileTime.from(ts2) - } + def attrs = Files.readAttributes(outFile, BasicFileAttributes) + def expectedString = '{"type":"Output",' + + '"path":"' + outFile.toString() + '",' + + '"hash":"'+ fileHash + '",' + + '"source":"cid://15cd5b07",' + + '"size":'+attrs.size() + ',' + + '"createdAt":' + attrs.creationTime().toMillis() + ',' + + '"modifiedAt":'+ attrs.lastModifiedTime().toMillis() + ',' + + '"annotations":null}' + and: observer.readAttributes(outFile) >> attrs when: observer.storeTaskOutput(task, outFile) then: - folder.resolve("${hash}/foo/bar/file.bam").text - == '{"uri":"cid://15cd5b07/foo/bar/file.bam","size":100,"createdAt":1737914400,"modifiedAt":1737914500,"annotations":null}' + folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text + == JsonOutput.prettyPrint(expectedString) cleanup: folder?.deleteDir() From fe780a854148210ef519da565110fa5d71fbfef3 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 12 Feb 2025 19:34:46 +0100 Subject: [PATCH 09/72] fix tests Signed-off-by: jorgee --- modules/nf-commons/src/main/nextflow/file/FileHelper.groovy | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy index db577b6e08..430222ade5 100644 --- a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy @@ -256,6 +256,9 @@ class FileHelper { } static Path toCanonicalPath(value) { + if( value==null ) + return null + Path result = toPath(value) if( result.fileSystem != FileSystems.default ) { From f9f7ed221777811051e9cefec1c48577ef997546 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 14 Feb 2025 14:16:14 +0100 Subject: [PATCH 10/72] first M1 updates Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 14 ++ .../main/groovy/nextflow/cli/CmdCid.groovy | 166 +++++++++++++++++- .../main/groovy/nextflow/cli/CmdLog.groovy | 1 + .../nextflow/data/cid/CidObserver.groovy | 25 +-- .../nextflow/data/cid/DefaultCidStore.groovy | 16 +- .../nextflow/data/cid/model/TaskRun.groovy | 1 - .../nextflow/data/cid/model/Workflow.groovy | 4 +- .../nextflow/processor/PublishDir.groovy | 15 +- .../nextflow/script/ScriptRunner.groovy | 3 +- .../groovy/nextflow/util/HistoryFile.groovy | 46 ++++- .../nextflow/dag/mermaid.dag.template.html | 2 +- 11 files changed, 250 insertions(+), 43 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index ca846bfcd8..78a7e9293b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -16,6 +16,9 @@ package nextflow +import nextflow.data.cid.CidStore +import nextflow.data.cid.DefaultCidStore +import nextflow.data.config.DataConfig import nextflow.util.CacheHelper import java.nio.file.Files @@ -262,6 +265,10 @@ class Session implements ISession { private HashCode executionHash + private CidStore cidStore + + CidStore getCidStore() { cidStore } + String getExecutionHash() { executionHash } private WorkflowMetadata workflowMetadata @@ -405,6 +412,8 @@ class Session implements ISession { if (config.cid) { this.cidEnabled = true + this.cidStore = new DefaultCidStore() + this.cidStore.open(DataConfig.create(this)) } } @@ -439,6 +448,11 @@ class Session implements ISession { if(cidEnabled) { this.executionHash = generateExecutionHash(scriptFile) + this.outputDir = cidStore.getPath().resolve(executionHash.toString()) + log.warn("CID store enabled. Defined output directory will be ignored and set to ${outputDir}.") + if( !HistoryFile.disabled() && HistoryFile.DEFAULT.exists() ) { + HistoryFile.DEFAULT.updateCidHash(runName,executionHash.toString()) + } } if(!workDir.mkdirs()) throw new AbortOperationException("Cannot create work-dir: $workDir -- Make sure you have write permissions or specify a different directory by using the `-w` command line option") log.debug "Work-dir: ${workDir.toUriString()} [${FileHelper.getPathFsType(workDir)}]" diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index d6d242fd6d..72d92bd0a5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -18,10 +18,24 @@ package nextflow.cli import com.beust.jcommander.Parameter +import groovy.json.JsonSlurper +import groovy.transform.Canonical import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.config.ConfigBuilder +import nextflow.dag.MermaidHtmlRenderer +import nextflow.data.cid.CidStore +import nextflow.data.cid.DefaultCidStore +import nextflow.data.cid.model.DataType +import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins +import java.nio.file.Path +import java.nio.file.Paths + +import static nextflow.data.cid.CidObserver.* + /** * * @author Paolo Di Tommaso @@ -33,13 +47,15 @@ class CmdCid extends CmdBase { interface SubCmd { String getName() - void apply(List result) - void usage(List result) + void apply(List args) + void usage() } private List commands = new ArrayList<>() CmdCid() { + commands << new CmdShow() + commands << new CmdLineage() } @@ -75,4 +91,150 @@ class CmdCid extends CmdBase { msg += " -- Did you mean one of these?\n" + matches.collect { " $it"}.join('\n') throw new AbortOperationException(msg) } + + class CmdShow implements SubCmd{ + + @Override + String getName() { + return 'show' + } + + @Override + void apply(List args) { + if (args.size() != 1) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + final config = new ConfigBuilder() + .setOptions(getLauncher().getOptions()) + .setBaseDir(Paths.get('.')) + .build() + final session = new Session(config) + final store = session.cidStore + println store.load("${args[0]}/$METADATA_FILE").toString() + } + + @Override + void usage() { + println 'Usage: nextflow cid show ' + } + } + + + class CmdLineage implements SubCmd { + + @Canonical + class Edge { + String source + String destination + String label + } + + @Override + String getName() { 'lineage' } + + @Override + void apply(List args) { + if (args.size() != 2) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + try { + final config = new ConfigBuilder() + .setOptions(getLauncher().getOptions()) + .setBaseDir(Paths.get('.')) + .build() + final session = new Session(config) + final store = session.cidStore + final template = readTemplate() + final network = getLineage(store, args[0]) + Path file = Path.of(args[1]) + file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) + println("Linage graph for ${args[0]} rendered in ${args[1]}") + } catch (Throwable e) { + println("ERROR: rendering lineage graph. ${e.getLocalizedMessage()}") + } + } + + private String getLineage(CidStore store, String dataCid) { + def lines = [] as List + lines << "flowchart BT".toString() + + final nodesToRender = new LinkedList() + nodesToRender.add(dataCid) + final edgesToRender = new LinkedList() + while (!nodesToRender.isEmpty()) { + final node = nodesToRender.removeFirst() + processNode(lines, node, nodesToRender, edgesToRender, store) + } + lines << "" + edgesToRender.each { lines << " ${it.source} -->${it.destination}".toString() } + lines << "" + return lines.join('\n') + } + + private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { + final slurper = new JsonSlurper() + final cidObject = slurper.parse(store.load("$nodeToRender/$METADATA_FILE").toString().toCharArray()) as Map + switch (DataType.valueOf(cidObject.type as String)) { + case DataType.Output: + lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); + final source = cidObject.source as String + if (source) { + if (source.startsWith(CID_PROT)) { + final cid = source.substring(CID_PROT.size()) + nodes.add(cid) + edges.add(new Edge(cid, nodeToRender)) + } else { + lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); + edges.add(new Edge(source, nodeToRender)) + } + } + + break; + case DataType.WorkflowRun: + lines << "${nodeToRender}@{shape: processes, label: \"${cidObject.runName}\"}".toString() + final parameters = cidObject.params as Map + parameters.values().each { + lines << " ${it}@{shape: document, label: \"${it}\"}".toString(); + edges.add(new Edge(it.toString(), nodeToRender)) + } + break; + case DataType.Task: + lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() + final parameters = cidObject.inputs as Map + parameters.values().each { String source -> + if (source.startsWith(CID_PROT)) { + final cid = source.substring(CID_PROT.size()) + nodes.add(cid) + edges.add(new Edge(cid, nodeToRender)) + } else { + lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); + edges.add(new Edge(source, nodeToRender)) + } + } + break; + default: + throw new Exception("Unrecognized type reference ${cidObject.type}") + } + } + + private String readTemplate() { + final writer = new StringWriter() + final res = MermaidHtmlRenderer.class.getResourceAsStream('mermaid.dag.template.html') + int ch + while( (ch=res.read()) != -1 ) { + writer.append(ch as char) + } + writer.toString() + } + + @Override + void usage() { + println 'Usage: nextflow cid lineage ' + } + + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy index 66d88980d1..0c6d4356f3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy @@ -203,6 +203,7 @@ class CmdLog extends CmdBase implements CacheBase { .head('STATUS') .head('REVISION ID') .head('SESSION ID') + .head('CID HASH') .head('COMMAND') history.eachRow { List row -> diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index ef1757e2ca..cf739b96a1 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -21,6 +21,7 @@ import com.google.common.hash.HashCode import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowRun import nextflow.file.FileHelper +import nextflow.script.ScriptMeta import java.nio.file.Files import java.nio.file.Path @@ -31,7 +32,6 @@ import groovy.transform.CompileStatic import nextflow.Session import nextflow.data.cid.model.DataType import nextflow.data.cid.model.Output -import nextflow.data.config.DataConfig import nextflow.processor.TaskHandler import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam @@ -45,15 +45,15 @@ import nextflow.util.CacheHelper */ @CompileStatic class CidObserver implements TraceObserver { - + public static final String METADATA_FILE = '.data.json' + public static final String CID_PROT = 'cid://' private CidStore store private Session session @Override void onFlowCreate(Session session) { this.session = session - store = new DefaultCidStore() - store.open(DataConfig.create(session)) + this.store = session.cidStore } void onFlowBegin() { @@ -64,7 +64,7 @@ class CidObserver implements TraceObserver { final workflow = new Workflow( DataType.Workflow, session.workflowMetadata.scriptFile.toString(), - session.workflowMetadata.scriptId.toString(), + ScriptMeta.allScriptNames().values().collect { it.toString()}, session.workflowMetadata.repository, session.workflowMetadata.commitId ) @@ -76,7 +76,7 @@ class CidObserver implements TraceObserver { session.params ) final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) - store.save("${session.executionHash}/.data.json", content) + store.save("${session.executionHash}/$METADATA_FILE", content) } @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { @@ -109,7 +109,7 @@ class CidObserver implements TraceObserver { task.inputFilesMap ? convertToReferences(task.inputFilesMap): null ) // store in the underlying persistence - final key = "${value.hash}/.data.json" + final key = "${value.hash}/$METADATA_FILE" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } @@ -117,13 +117,13 @@ class CidObserver implements TraceObserver { final attrs = readAttributes(path) final rel = task.workDir.relativize(path).toString() final cid = "${task.hash}/${rel}" - final key = "${cid}/.data.json" + final key = "${cid}/$METADATA_FILE" final hash = CacheHelper.hasher(path).hash().toString() final value = new Output( DataType.Output, path.toString(), hash, - "cid://$task.hash", + "$CID_PROT$task.hash", attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) @@ -139,7 +139,7 @@ class CidObserver implements TraceObserver { void onFilePublish(Path destination, Path source){ final hash = CacheHelper.hasher(destination).hash().toString() final rel = session.outputDir.relativize(destination).toString() - final key = "${rel}/.data.json" + final key = "$session.executionHash/${rel}/$METADATA_FILE" final sourceReference = getSourceReference(source) final attrs = readAttributes(destination) final value = new Output( @@ -157,7 +157,7 @@ class CidObserver implements TraceObserver { final hash = FileHelper.getTaskHashFromPath(source, session.workDir) if (hash) { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() - return "cid://$hash/$target" + return "$CID_PROT$hash/$target" } return null } @@ -166,6 +166,7 @@ class CidObserver implements TraceObserver { void onFilePublish(Path destination){ final hash = CacheHelper.hasher(destination).hash().toString() final rel = session.outputDir.relativize(destination).toString() + final key = "$session.executionHash/${rel}/$METADATA_FILE" final attrs = readAttributes(destination) final value = new Output( DataType.Output, @@ -175,7 +176,7 @@ class CidObserver implements TraceObserver { attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) - store.save(rel, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } protected Map convertToReferences(Map inputs) { diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 1e7c22cec8..958cc4ef49 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -34,18 +34,20 @@ import nextflow.exception.AbortOperationException @CompileStatic class DefaultCidStore implements CidStore { + private Path metaLocation private Path location void open(DataConfig config) { - location = config.store.location.resolve('.meta') - if( !Files.exists(location) && !Files.createDirectories(location) ) { - throw new AbortOperationException("Unable to create CID store directory: $location") + location = config.store.location + metaLocation = location.resolve('.meta') + if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { + throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } } @Override void save(String key, Object value) { - final path = location.resolve(key) + final path = metaLocation.resolve(key) Files.createDirectories(path.parent) log.debug "Save CID file path: $path" path.text = value @@ -53,15 +55,15 @@ class DefaultCidStore implements CidStore { @Override void list(String key, Consumer consumer) { - for( Path it : Files.walk(location.resolve(key)) ) { - final fileKey = location.relativize(it).toString() + for( Path it : Files.walk(metaLocation.resolve(key)) ) { + final fileKey = metaLocation.relativize(it).toString() consumer.accept(fileKey) } } @Override Object load(String key) { - location.resolve(key).text + metaLocation.resolve(key).text } @Override diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index fea557ee08..22318cdeda 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -30,7 +30,6 @@ class TaskRun { DataType type int id String name - String hash Map inputs List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy index a52ddbd814..643af9ec7e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy @@ -29,8 +29,8 @@ import groovy.transform.CompileStatic @CompileStatic class Workflow { DataType type - String scriptFile - String scriptId + String mainScriptFile + List otherScriptFiles String repository String commitId } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index e36fa051b1..fce784b543 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -149,11 +149,11 @@ class PublishDir { if( session?.cidEnabled ){ final resolvedPath = FileHelper.toPath(resolved) if (resolvedPath.isAbsolute()){ - log.warn("CID store is enabled but publish dir is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") + log.warn("CID store is enabled but 'publishDir' is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") this.path = FileHelper.toCanonicalPath(resolved) } else{ - this.path = session.outputDir.resolve(session.executionHash).resolve(resolvedPath) + this.path = session.outputDir.resolve(resolvedPath) } } else { @@ -385,17 +385,6 @@ class PublishDir { throw new IllegalArgumentException("Not a valid publish target path: `$target` [${target?.class?.name}]") } - private Path resolveRelative(String target){ - //If comes from a task - if (session.cidEnabled && sourceDir && sourceDir.startsWith(session.workDir)){ - log.debug("Must add taskhash") - String taskHash = FileHelper.getTaskHashFromPath(sourceDir, session.workDir) - if( taskHash ) - return path.resolve(Path.of(taskHash, target.toString())) - } - return path.resolve(target) - } - protected void safeProcessFile(Path source, Path target) { try { retryableProcessFile(source, target) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy index 498d50f41f..1ac700e44f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy @@ -284,7 +284,8 @@ class ScriptRunner { } def revisionId = scriptFile.commitId ?: scriptFile.scriptId - HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, cli ) + def executionHash = session.executionHash ?: '-' + HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, executionHash, cli ) } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy index 15d5cb83ca..92a8e93cbe 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy @@ -61,14 +61,14 @@ class HistoryFile extends File { super(file.toString()) } - void write( String name, UUID key, String revisionId, args, Date date = null ) { + void write( String name, UUID key, String revisionId, String cidHash, args, Date date = null ) { assert key assert args != null withFileLock { def timestamp = date ?: new Date() def value = args instanceof Collection ? args.join(' ') : args - this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, command: value).toString() << '\n' + this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, cidHash: cidHash, command: value).toString() << '\n' } } @@ -350,6 +350,41 @@ class HistoryFile extends File { } + void updateCidHash(String name, String hashCode) { + assert name + assert hashCode + try { + withFileLock {updateCidHash0(name, hashCode) } + } + catch( Throwable e ) { + log.warn "Can't update history file: $this",e + } + } + + private void updateCidHash0(String name, String hashCode){ + def newHistory = new StringBuilder() + + this.readLines().each { line -> + try { + def current = line ? Record.parse(line) : null + if( current?.runName == name ) { + current.cidHash = hashCode + newHistory << current.toString() << '\n' + } + else { + newHistory << line << '\n' + } + } + catch( IllegalArgumentException e ) { + log.warn("Can't read history file: $this", e) + } + } + + // rewrite the history content + this.setText(newHistory.toString()) + } + + @EqualsAndHashCode(includes = 'runName,sessionId') static class Record { Date timestamp @@ -358,6 +393,7 @@ class HistoryFile extends File { String status String revisionId UUID sessionId + String cidHash String command Record(String sessionId, String name=null) { @@ -380,6 +416,7 @@ class HistoryFile extends File { line << (status ?: '-') line << (revisionId ?: '-') line << (sessionId.toString()) + line << (cidHash ?: '-') line << (command ?: '-') } @@ -393,7 +430,7 @@ class HistoryFile extends File { if( cols.size() == 2 ) return new Record(cols[0]) - if( cols.size()==7 ) { + if( cols.size()== 8 ) { return new Record( timestamp: TIMESTAMP_FMT.parse(cols[0]), @@ -402,7 +439,8 @@ class HistoryFile extends File { status: cols[3] && cols[3] != '-' ? cols[3] : null, revisionId: cols[4], sessionId: UUID.fromString(cols[5]), - command: cols[6] + cidHash: cols[6], + command: cols[7] ) } diff --git a/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html index 0ab1d9475e..ebbf8e834a 100644 --- a/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html +++ b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html @@ -36,7 +36,7 @@ REPLACE_WITH_NETWORK_DATA From 0c2492e5b742a88eb87a11995cd634e5a79f74d1 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 14 Feb 2025 14:57:15 +0100 Subject: [PATCH 11/72] fix tests Signed-off-by: jorgee --- .../nextflow/data/cid/CidObserver.groovy | 3 +- .../groovy/nextflow/cli/CmdLogTest.groovy | 4 +- .../nextflow/data/cid/CidObserverTest.groovy | 17 ++++- .../nextflow/util/HistoryFileTest.groovy | 76 ++++++++++++------- 4 files changed, 67 insertions(+), 33 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index cf739b96a1..a313e925e9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -105,11 +105,10 @@ class CidObserver implements TraceObserver { DataType.Task, task.id.value, task.getName(), - task.hash.toString(), task.inputFilesMap ? convertToReferences(task.inputFilesMap): null ) // store in the underlying persistence - final key = "${value.hash}/$METADATA_FILE" + final key = "${task.hash}/$METADATA_FILE" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy index 8deff84359..56aced51b9 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy @@ -97,7 +97,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','run') + history.write(runName,uuid,'b3d3aca8eb','-','run') when: def log = new CmdLog(basePath: folder, args: [runName]) @@ -167,7 +167,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','run') + history.write(runName,uuid,'b3d3aca8eb','-','run') when: diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index 233520c96a..80c3295f5d 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -18,6 +18,7 @@ package nextflow.data.cid import groovy.json.JsonOutput +import nextflow.data.config.DataConfig import nextflow.util.CacheHelper import java.nio.file.Files @@ -40,7 +41,12 @@ class CidObserverTest extends Specification { given: def folder = Files.createTempDirectory('test') def config = [cid:[store:[location:folder.toString()]]] - def session = Mock(Session) { getConfig()>>config } + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + store.open(DataConfig.create(session)) def observer = new CidObserver() observer.onFlowCreate(session) and: @@ -54,7 +60,7 @@ class CidObserverTest extends Specification { when: observer.storeTaskRun(task) then: - folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","hash":"15cd5b07","inputs": null,"annotations":null}') + folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","inputs": null,"annotations":null}') cleanup: folder?.deleteDir() @@ -64,7 +70,12 @@ class CidObserverTest extends Specification { given: def folder = Files.createTempDirectory('test') def config = [cid:[store:[location:folder.toString()]]] - def session = Mock(Session) { getConfig()>>config } + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + store.open(DataConfig.create(session)) def observer = Spy(new CidObserver()) observer.onFlowCreate(session) and: diff --git a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy index 4233f744b1..c867304d70 100644 --- a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy @@ -32,10 +32,10 @@ class HistoryFileTest extends Specification { b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa -resume 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa -2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' def 'should support custom base dir' () { @@ -66,9 +66,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa def d1 = new Date(now - 50_000) def d2 = new Date(now - 30_000) def d3 = new Date(now - 10_000) - history.write( 'hello_world', id1, 'abc', [1,2,3], d1 ) - history.write( 'super_star', id2, '123', [1,2,3], d2 ) - history.write( 'slow_food', id3, 'xyz', [1,2,3], d3 ) + history.write( 'hello_world', id1, 'abc', '-', [1,2,3], d1 ) + history.write( 'super_star', id2, '123', '-', [1,2,3], d2 ) + history.write( 'slow_food', id3, 'xyz', '-', [1,2,3], d3 ) then: history.getLast() == new HistoryRecord(sessionId: id3, runName: 'slow_food', timestamp: d3, command: '1 2 3') @@ -243,9 +243,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa then: history.text == ''' 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa - 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello - 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume - 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello + 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello + 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume + 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' .stripIndent() } @@ -306,14 +306,38 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.findAllRunNames() == ['evil_pike', 'gigantic_keller', 'small_cirum', 'modest_bartik'] as Set } + def 'should update cid hash ' () { + given: + def source = ''' +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +''' + def file = Files.createTempFile('test',null) + file.deleteOnExit() + file.text = source + def history = new HistoryFile(file) + + + when: + history.updateCidHash('evil_pike','cid_hash') + then: + history.text == ''' +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\tcid_hash\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +''' + } def 'should update the history entries ' () { given: def source = ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' def file = Files.createTempFile('test',null) file.deleteOnExit() @@ -326,10 +350,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('evil_pike',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' when: @@ -337,10 +361,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('small_cirum',false,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' when: @@ -348,10 +372,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('gigantic_keller',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello -2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello +2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello ''' } From 41ac817f87ded4dd6e2fcf2f7734dafc7cbd82a5 Mon Sep 17 00:00:00 2001 From: jorgee Date: Mon, 17 Feb 2025 18:15:43 +0100 Subject: [PATCH 12/72] update descriptions Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdCid.groovy | 9 +- .../nextflow/data/cid/CidObserver.groovy | 58 +++++++++--- .../nextflow/data/cid/model/DataType.groovy | 2 +- .../nextflow/data/cid/model/Output.groovy | 2 +- .../nextflow/data/cid/model/TaskRun.groovy | 11 ++- .../data/cid/model/WorkflowRun.groovy | 2 +- .../nextflow/processor/TaskProcessor.groovy | 4 +- .../groovy/nextflow/processor/TaskRun.groovy | 4 + .../nextflow/util/PathNormalizer.groovy | 93 +++++++++++++++++++ 9 files changed, 159 insertions(+), 26 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index 72d92bd0a5..a27bfdfec7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -179,7 +179,8 @@ class CmdCid extends CmdBase { final slurper = new JsonSlurper() final cidObject = slurper.parse(store.load("$nodeToRender/$METADATA_FILE").toString().toCharArray()) as Map switch (DataType.valueOf(cidObject.type as String)) { - case DataType.Output: + case DataType.TaskOutput: + case DataType.WorkflowOutput: lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); final source = cidObject.source as String if (source) { @@ -202,10 +203,10 @@ class CmdCid extends CmdBase { edges.add(new Edge(it.toString(), nodeToRender)) } break; - case DataType.Task: + case DataType.TaskRun: lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() - final parameters = cidObject.inputs as Map - parameters.values().each { String source -> + final parameters = cidObject.inputs as List + parameters.each { String source -> if (source.startsWith(CID_PROT)) { final cid = source.substring(CID_PROT.size()) nodes.add(cid) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index a313e925e9..f59b582f60 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -17,11 +17,12 @@ package nextflow.data.cid -import com.google.common.hash.HashCode +import groovy.util.logging.Slf4j import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowRun import nextflow.file.FileHelper import nextflow.script.ScriptMeta +import nextflow.util.PathNormalizer import java.nio.file.Files import java.nio.file.Path @@ -43,6 +44,7 @@ import nextflow.util.CacheHelper * * @author Paolo Di Tommaso */ +@Slf4j @CompileStatic class CidObserver implements TraceObserver { public static final String METADATA_FILE = '.data.json' @@ -61,10 +63,12 @@ class CidObserver implements TraceObserver { } protected void storeWorkflowRun() { + final normalizer = new PathNormalizer(session.workflowMetadata) + final mainScript = normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()) final workflow = new Workflow( DataType.Workflow, - session.workflowMetadata.scriptFile.toString(), - ScriptMeta.allScriptNames().values().collect { it.toString()}, + mainScript, + ScriptMeta.allScriptNames().values().collect {normalizer.normalizePath(it.normalize())}, session.workflowMetadata.repository, session.workflowMetadata.commitId ) @@ -73,19 +77,36 @@ class CidObserver implements TraceObserver { workflow, session.uniqueId.toString(), session.runName, - session.params + getNormalizedParams(session.params, normalizer) ) final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) store.save("${session.executionHash}/$METADATA_FILE", content) } + + private static Map getNormalizedParams(Map params, PathNormalizer normalizer){ + final normalizedParams = new HashMap() + params.each{String key, Object value -> + log.debug("Managing parameter $key , class ${value.class}") + if (value instanceof Path) + normalizedParams.put(key,normalizer.normalizePath(value as Path)) + else if (value instanceof String || value instanceof GString) + normalizedParams.put(key,normalizer.normalizePath(value.toString())) + else + normalizedParams.put(key, value) + } + return normalizedParams + } + + @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { storeTaskInfo(handler.task) } protected void storeTaskInfo(TaskRun task) { + final pathNormalizer = new PathNormalizer(session.workflowMetadata) // store the task run entry - storeTaskRun(task) + storeTaskRun(task, pathNormalizer) // store all task outputs files final outputs = task.getOutputsByType(FileOutParam) for( Map.Entry entry : outputs ) { @@ -100,12 +121,19 @@ class CidObserver implements TraceObserver { } } - protected void storeTaskRun(TaskRun task) { + protected void storeTaskRun(TaskRun task, PathNormalizer normalizer) { final value = new nextflow.data.cid.model.TaskRun( - DataType.Task, - task.id.value, + DataType.TaskRun, + session.uniqueId.toString(), task.getName(), - task.inputFilesMap ? convertToReferences(task.inputFilesMap): null + session.stubRun ? task.stubSource: task.source, + task.inputFilesMap ? convertToReferences(task.inputFilesMap, normalizer): null, + task.isContainerEnabled() ? task.getContainerFingerprint(): null, + normalizer.normalizePath(task.getCondaEnv()), + normalizer.normalizePath(task.getSpackEnv()), + task.config?.getArchitecture()?.toString(), + task.processor.getTaskGlobalVars(task), + task.processor.getTaskBinEntries(task.source).collect { Path p -> normalizer.normalizePath(p.normalize()) } ) // store in the underlying persistence final key = "${task.hash}/$METADATA_FILE" @@ -119,7 +147,7 @@ class CidObserver implements TraceObserver { final key = "${cid}/$METADATA_FILE" final hash = CacheHelper.hasher(path).hash().toString() final value = new Output( - DataType.Output, + DataType.TaskOutput, path.toString(), hash, "$CID_PROT$task.hash", @@ -142,7 +170,7 @@ class CidObserver implements TraceObserver { final sourceReference = getSourceReference(source) final attrs = readAttributes(destination) final value = new Output( - DataType.Output, + DataType.WorkflowOutput, destination.toString(), hash, sourceReference, @@ -168,7 +196,7 @@ class CidObserver implements TraceObserver { final key = "$session.executionHash/${rel}/$METADATA_FILE" final attrs = readAttributes(destination) final value = new Output( - DataType.Output, + DataType.WorkflowOutput, destination.toString(), hash, session.executionHash, @@ -178,11 +206,11 @@ class CidObserver implements TraceObserver { store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) } - protected Map convertToReferences(Map inputs) { - Map references = new HashMap() + protected List convertToReferences(Map inputs, PathNormalizer normalizer) { + List references = new LinkedList() inputs.each { name, path -> final ref = getSourceReference(path) - references.put(name, ref ? ref : path.toString())} + references.add(ref ? ref : normalizer.normalizePath(path))} return references } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy index ccacbb145d..955f131e9e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy @@ -22,5 +22,5 @@ package nextflow.data.cid.model * @author Paolo Di Tommaso */ enum DataType { - Task, Workflow, WorkflowRun, Output + TaskRun, Workflow, WorkflowRun, TaskOutput, WorkflowOutput } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy index 738f843cc6..610168b129 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy @@ -29,7 +29,7 @@ import groovy.transform.CompileStatic class Output { DataType type String path - String hash + String checksum String source long size long createdAt diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy index 22318cdeda..857f2ee099 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy @@ -28,8 +28,15 @@ import groovy.transform.CompileStatic @CompileStatic class TaskRun { DataType type - int id + String sessionId String name - Map inputs + String source + List inputs + String container + String conda + String spack + String architecture + Map globalVars + List binEntries List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy index cdc67e50ba..e99cdd8425 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy @@ -29,7 +29,7 @@ import groovy.transform.CompileStatic class WorkflowRun { DataType type Workflow workflow - String uniqueId + String sessionId String name Map params } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 05a668c82e..b0bf67aaca 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -2274,7 +2274,7 @@ class TaskProcessor { * @return The list of paths of scripts in the project bin folder referenced in the task command */ @Memoized - protected List getTaskBinEntries(String script) { + public List getTaskBinEntries(String script) { List result = [] def tokenizer = new StringTokenizer(script," \t\n\r\f()[]{};&|<>`") while( tokenizer.hasMoreTokens() ) { @@ -2307,7 +2307,7 @@ class TaskProcessor { log.info(buffer.toString()) } - protected Map getTaskGlobalVars(TaskRun task) { + public Map getTaskGlobalVars(TaskRun task) { final result = task.getGlobalVars(ownerScript.binding) final directives = getTaskExtensionDirectiveVars(task) result.putAll(directives) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index bde46722f1..df3395d9e8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -979,5 +979,9 @@ class TaskRun implements Cloneable { CondaConfig getCondaConfig() { return processor.session.getCondaConfig() } + + String getStubSource(){ + return config?.getStubBlock()?.source + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy b/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy new file mode 100644 index 0000000000..7da3c5a925 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy @@ -0,0 +1,93 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.util + +import groovy.transform.CompileStatic +import nextflow.script.WorkflowMetadata + +import java.nio.file.Path + +/** + * + * @author Ben Sherman + */ +@CompileStatic +class PathNormalizer { + + private URL repository + + private String commitId + + private String projectDir + + private String workDir + + PathNormalizer(WorkflowMetadata metadata) { + repository = metadata.repository ? new URL(metadata.repository) : null + commitId = metadata.commitId + projectDir = metadata.projectDir.normalize().toUriString() + workDir = metadata.workDir.normalize().toUriString() + } + + /** + * Normalize paths against the original remote URL, or + * work directory, where appropriate. + * + * @param path + */ + String normalizePath(Path path) { + normalizePath(path.toUriString()) + } + + String normalizePath(String path) { + if(!path) + return null + // replace work directory with relative path + if( path.startsWith(workDir) ) + return path.replace(workDir, 'work') + + // replace project directory with source URL (if applicable) + if( repository && path.startsWith(projectDir) ) + return getProjectSourceUrl(path) + + // encode local absolute paths as file URLs + if( path.startsWith('/') ) + return 'file://' + path + + return path + } + + /** + * Get the source URL for a project asset. + * + * @param path + */ + private String getProjectSourceUrl(String path) { + switch( repository.host ) { + case 'bitbucket.org': + return path.replace(projectDir, "${repository}/src/${commitId}") + case 'github.com': + return path.replace(projectDir, "${repository}/tree/${commitId}") + case 'gitlab.com': + return path.replace(projectDir, "${repository}/-/tree/${commitId}") + default: + return path + } + } + +} From cdc31163d021cd29a381f38d76f537e3ae252ad3 Mon Sep 17 00:00:00 2001 From: jorgee Date: Mon, 17 Feb 2025 18:53:31 +0100 Subject: [PATCH 13/72] fix test Signed-off-by: jorgee --- .../nextflow/data/cid/CidObserverTest.groovy | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index 80c3295f5d..a5c1a3c426 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -19,9 +19,12 @@ package nextflow.data.cid import groovy.json.JsonOutput import nextflow.data.config.DataConfig +import nextflow.processor.TaskProcessor import nextflow.util.CacheHelper +import nextflow.util.PathNormalizer import java.nio.file.Files +import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes import java.nio.file.attribute.FileTime import java.time.Instant @@ -42,9 +45,11 @@ class CidObserverTest extends Specification { def folder = Files.createTempDirectory('test') def config = [cid:[store:[location:folder.toString()]]] def store = new DefaultCidStore(); + def uniqueId = UUID.randomUUID() def session = Mock(Session) { getConfig()>>config getCidStore()>>store + getUniqueId()>>uniqueId } store.open(DataConfig.create(session)) def observer = new CidObserver() @@ -52,15 +57,31 @@ class CidObserverTest extends Specification { and: def hash = HashCode.fromInt(123456789) and: + def processor = Mock(TaskProcessor){ + getTaskGlobalVars(_) >> [:] + getTaskBinEntries(_) >> [] + } def task = Mock(TaskRun) { getId() >> TaskId.of(100) getName() >> 'foo' getHash() >> hash + getProcessor() >> processor + getSource() >> 'echo task source' + } + def normalizer = Mock(PathNormalizer.class) { + normalizePath( _ as Path) >> {Path p -> p?.toString()} + normalizePath( _ as String) >> {String p -> p} } + def expectedString = '{"type":"TaskRun",' + + '"sessionId":"'+uniqueId.toString() + '",' + + '"name":"foo","source":"echo task source",' + + '"inputs": null,"container": null,"conda": null,' + + '"spack": null,"architecture": null,' + + '"globalVars": {},"binEntries": [],"annotations":null}' when: - observer.storeTaskRun(task) + observer.storeTaskRun(task, normalizer) then: - folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint('{"type":"Task","id":100,"name":"foo","inputs": null,"annotations":null}') + folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint(expectedString) cleanup: folder?.deleteDir() @@ -97,9 +118,9 @@ class CidObserverTest extends Specification { } and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) - def expectedString = '{"type":"Output",' + + def expectedString = '{"type":"TaskOutput",' + '"path":"' + outFile.toString() + '",' + - '"hash":"'+ fileHash + '",' + + '"checksum":"'+ fileHash + '",' + '"source":"cid://15cd5b07",' + '"size":'+attrs.size() + ',' + '"createdAt":' + attrs.creationTime().toMillis() + ',' + From 82b1ccd140912abd47c1c3abb523acafa189ed26 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 14:21:56 +0100 Subject: [PATCH 14/72] First commit to M1 implementation Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 36 +- .../main/groovy/nextflow/cli/CmdCid.groovy | 134 ++++-- .../main/groovy/nextflow/cli/CmdLog.groovy | 1 - .../nextflow/data/cid/CidHistoryFile.groovy | 144 +++++++ .../nextflow/data/cid/CidObserver.groovy | 287 +++++++++---- .../groovy/nextflow/data/cid/CidStore.groovy | 1 + .../nextflow/data/cid/DefaultCidStore.groovy | 11 +- .../nextflow/data/cid/fs/CidFileSystem.groovy | 130 ++++++ .../data/cid/fs/CidFileSystemProvider.groovy | 320 +++++++++++++++ .../nextflow/data/cid/fs/CidPath.groovy | 381 ++++++++++++++++++ .../data/cid/fs/CidPathFactory.groovy | 61 +++ .../nextflow/data/cid/model/DataPath.groovy | 33 ++ .../nextflow/data/cid/model/DataType.groovy | 3 +- .../nextflow/data/cid/model/Parameter.groovy | 34 ++ .../nextflow/data/cid/model/TaskRun.groovy | 7 +- .../nextflow/data/cid/model/Workflow.groovy | 5 +- .../data/cid/model/WorkflowResults.groovy | 34 ++ .../data/cid/model/WorkflowRun.groovy | 3 +- .../nextflow/data/config/DataConfig.groovy | 8 +- .../nextflow/processor/PublishDir.groovy | 14 +- .../nextflow/script/ScriptRunner.groovy | 3 +- .../trace/DefaultObserverFactory.groovy | 2 +- .../groovy/nextflow/util/HistoryFile.groovy | 93 +---- .../groovy/nextflow/util/WithLockFile.groovy | 78 ++++ .../java.nio.file.spi.FileSystemProvider | 17 + .../groovy/nextflow/cli/CmdCidTest.groovy | 258 ++++++++++++ .../groovy/nextflow/cli/CmdLogTest.groovy | 4 +- .../data/cid/CidHistoryFileTest.groovy | 158 ++++++++ .../nextflow/data/cid/CidObserverTest.groovy | 263 +++++++++++- .../cid/fs/CidFileSystemProviderTest.groovy | 372 +++++++++++++++++ .../nextflow/data/cid/fs/CidPathTest.groovy | 280 +++++++++++++ .../data/cid/fs/CifPathFactoryTest.groovy | 88 ++++ .../nextflow/util/HistoryFileTest.groovy | 76 ++-- .../src/main/nextflow/file/FileHelper.groovy | 2 + 34 files changed, 3036 insertions(+), 305 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy create mode 100644 modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider create mode 100644 modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 78a7e9293b..a7e7f79565 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -19,7 +19,6 @@ package nextflow import nextflow.data.cid.CidStore import nextflow.data.cid.DefaultCidStore import nextflow.data.config.DataConfig -import nextflow.util.CacheHelper import java.nio.file.Files import java.nio.file.Path @@ -263,14 +262,10 @@ class Session implements ISession { boolean getCidEnabled() { cidEnabled } - private HashCode executionHash - private CidStore cidStore CidStore getCidStore() { cidStore } - String getExecutionHash() { executionHash } - private WorkflowMetadata workflowMetadata private WorkflowStatsObserver statsObserver @@ -410,14 +405,19 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) - if (config.cid) { + if(config.navigate('workflow.data')) { this.cidEnabled = true - this.cidStore = new DefaultCidStore() - this.cidStore.open(DataConfig.create(this)) + this.cidStore = createCidStore(this) } } + protected static CidStore createCidStore(Session session){ + final store = new DefaultCidStore() + store.open(DataConfig.create(session)) + return store + } + protected Path cloudCachePath(Map cloudcache, Path workDir) { if( !cloudcache?.enabled ) return null @@ -428,32 +428,12 @@ class Session implements ISession { } return result } - private HashCode generateExecutionHash(ScriptFile scriptFile){ - List keys = [generateScriptHash(scriptFile).toString(), scriptFile?.repository, scriptFile?.commitId, uniqueId, (Map)config.params] - return CacheHelper.hasher(keys).hash() - } - - private HashCode generateScriptHash(ScriptFile scriptFile){ - List keys = [ scriptFile?.scriptId ] - for( Path p : ScriptMeta.allScriptNames().values() ){ - keys << CacheHelper.hasher(p.text).hash().toString() - } - return CacheHelper.hasher(keys).hash() - } /** * Initialize the session workDir, libDir, baseDir and scriptName variables */ Session init( ScriptFile scriptFile, List args=null ) { - if(cidEnabled) { - this.executionHash = generateExecutionHash(scriptFile) - this.outputDir = cidStore.getPath().resolve(executionHash.toString()) - log.warn("CID store enabled. Defined output directory will be ignored and set to ${outputDir}.") - if( !HistoryFile.disabled() && HistoryFile.DEFAULT.exists() ) { - HistoryFile.DEFAULT.updateCidHash(runName,executionHash.toString()) - } - } if(!workDir.mkdirs()) throw new AbortOperationException("Cannot create work-dir: $workDir -- Make sure you have write permissions or specify a different directory by using the `-w` command line option") log.debug "Work-dir: ${workDir.toUriString()} [${FileHelper.getPathFsType(workDir)}]" diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index a27bfdfec7..3b17ca35b8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -24,17 +24,18 @@ import groovy.transform.CompileStatic import nextflow.Session import nextflow.config.ConfigBuilder import nextflow.dag.MermaidHtmlRenderer +import nextflow.data.cid.CidHistoryFile import nextflow.data.cid.CidStore -import nextflow.data.cid.DefaultCidStore import nextflow.data.cid.model.DataType -import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins +import nextflow.ui.TableBuilder import java.nio.file.Path import java.nio.file.Paths -import static nextflow.data.cid.CidObserver.* +import static nextflow.data.cid.fs.CidPath.CID_PROT +import static nextflow.data.cid.fs.CidPath.METADATA_FILE /** * @@ -54,9 +55,11 @@ class CmdCid extends CmdBase { private List commands = new ArrayList<>() CmdCid() { + commands << new CmdLog() commands << new CmdShow() commands << new CmdLineage() + } @Parameter(hidden = true) @@ -92,6 +95,51 @@ class CmdCid extends CmdBase { throw new AbortOperationException(msg) } + class CmdLog implements SubCmd { + + @Override + String getName() { + return 'log' + } + + @Override + void apply(List args) { + if (args.size() != 0) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + final config = new ConfigBuilder() + .setOptions(getLauncher().getOptions()) + .setBaseDir(Paths.get('.')) + .build() + final session = new Session(config) + printHistory(session.cidStore) + + } + + private void printHistory(CidStore store) { + + + final historyFile = store.getHistoryFile() + if (historyFile.exists()) { + def table = new TableBuilder(cellSeparator: '\t') + .head('TIMESTAMP') + .head('RUN NAME') + .head('SESSION ID') + .head('RUN CID') + historyFile.eachLine { table.append(CidHistoryFile.CidRecord.parse(it).toList()) } + println table.toString() + } else { + println("No workflow runs CIDs found.") + } + } + + @Override + void usage() { + println 'Usage: nextflow cid log' + } + } class CmdShow implements SubCmd{ @Override @@ -106,13 +154,20 @@ class CmdCid extends CmdBase { usage() return } + if (!args[0].startsWith(CID_PROT)) + throw new Exception("Identifier is not a CID URL") + final key = args[0].substring(CID_PROT.size()) + "/$METADATA_FILE" final config = new ConfigBuilder() .setOptions(getLauncher().getOptions()) .setBaseDir(Paths.get('.')) .build() final session = new Session(config) final store = session.cidStore - println store.load("${args[0]}/$METADATA_FILE").toString() + try { + println store.load(key).toString() + }catch (Throwable e){ + println "Error loading ${args[0]}." + } } @Override @@ -154,7 +209,7 @@ class CmdCid extends CmdBase { file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) println("Linage graph for ${args[0]} rendered in ${args[1]}") } catch (Throwable e) { - println("ERROR: rendering lineage graph. ${e.getLocalizedMessage()}") + println("ERROR: rendering lineage graph. ${e.message}") } } @@ -176,8 +231,11 @@ class CmdCid extends CmdBase { } private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { + if (!nodeToRender.startsWith(CID_PROT)) + throw new Exception("Identifier is not a CID URL") final slurper = new JsonSlurper() - final cidObject = slurper.parse(store.load("$nodeToRender/$METADATA_FILE").toString().toCharArray()) as Map + final key = nodeToRender.substring(CID_PROT.size()) + "/$METADATA_FILE" + final cidObject = slurper.parse(store.load(key).toString().toCharArray()) as Map switch (DataType.valueOf(cidObject.type as String)) { case DataType.TaskOutput: case DataType.WorkflowOutput: @@ -185,11 +243,11 @@ class CmdCid extends CmdBase { final source = cidObject.source as String if (source) { if (source.startsWith(CID_PROT)) { - final cid = source.substring(CID_PROT.size()) - nodes.add(cid) - edges.add(new Edge(cid, nodeToRender)) + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) } else { - lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); + final label = convertToLabel(source) + lines << " ${source}@{shape: document, label: \"${label}\"}".toString(); edges.add(new Edge(source, nodeToRender)) } } @@ -197,23 +255,23 @@ class CmdCid extends CmdBase { break; case DataType.WorkflowRun: lines << "${nodeToRender}@{shape: processes, label: \"${cidObject.runName}\"}".toString() - final parameters = cidObject.params as Map - parameters.values().each { - lines << " ${it}@{shape: document, label: \"${it}\"}".toString(); - edges.add(new Edge(it.toString(), nodeToRender)) + final parameters = cidObject.params as List + parameters.each { + final label = convertToLabel(it.value.toString()) + lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(it.value.toString(), nodeToRender)) } break; case DataType.TaskRun: lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() - final parameters = cidObject.inputs as List - parameters.each { String source -> - if (source.startsWith(CID_PROT)) { - final cid = source.substring(CID_PROT.size()) - nodes.add(cid) - edges.add(new Edge(cid, nodeToRender)) + final parameters = cidObject.inputs as List + for (nextflow.data.cid.model.Parameter source: parameters){ + if (source.type.equals(nextflow.script.params.FileInParam.simpleName)) { + manageFileInParam(lines, nodeToRender, nodes, edges, source.value) } else { - lines << " ${source}@{shape: document, label: \"${source}\"}".toString(); - edges.add(new Edge(source, nodeToRender)) + final label = convertToLabel(source.value.toString()) + lines << " ${source.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(source.value.toString(), nodeToRender)) } } break; @@ -222,7 +280,37 @@ class CmdCid extends CmdBase { } } - private String readTemplate() { + private String convertToLabel(String label){ + return label.replace('http', 'h\u200Ettp') + } + + private void manageFileInParam(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, value){ + if (value instanceof Collection) { + value.each { manageFileInParam(lines, nodeToRender, nodes, edges, it) } + return + } + if (value instanceof CharSequence) { + final source = value.toString() + if (source.startsWith(CID_PROT)) { + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) + return + } + } + if (value instanceof Map) { + if (value.path) { + final label = convertToLabel(value.path.toString()) + lines << " ${value.path}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(value.path.toString(), nodeToRender)) + return + } + } + final label = convertToLabel(value.toString()) + lines << " ${value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(value.toString(), nodeToRender)) + } + + protected static String readTemplate() { final writer = new StringWriter() final res = MermaidHtmlRenderer.class.getResourceAsStream('mermaid.dag.template.html') int ch diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy index 0c6d4356f3..66d88980d1 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLog.groovy @@ -203,7 +203,6 @@ class CmdLog extends CmdBase implements CacheBase { .head('STATUS') .head('REVISION ID') .head('SESSION ID') - .head('CID HASH') .head('COMMAND') history.eachRow { List row -> diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy new file mode 100644 index 0000000000..07b4e24b16 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy @@ -0,0 +1,144 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import groovy.transform.EqualsAndHashCode +import groovy.util.logging.Slf4j +import nextflow.util.WithLockFile + +import java.nio.file.Path +import java.text.DateFormat +import java.text.SimpleDateFormat + +/** + * File to store a history of the workflow executions and their corresponding CIDs + * + * @author Jorge Ejarque + */ +@Slf4j +class CidHistoryFile extends WithLockFile { + private static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') + + CidHistoryFile(Path file) { + super(file.toString()) + } + + void write(String name, UUID key, String runCid, Date date = null) { + assert key + + withFileLock { + def timestamp = date ?: new Date() + log.debug("Writting record for $key in CID history file $this") + this << new CidRecord(timestamp: timestamp, runName: name, sessionId: key, runCid: runCid).toString() << '\n' + } + } + + void update(UUID sessionId, String runCid) { + assert sessionId + + try { + withFileLock { update0(sessionId, runCid) } + } + catch (Throwable e) { + log.warn "Can't update cid history file: $this", e + } + } + + String getRunCid(UUID id){ + assert id + + for (String line: this.readLines()){ + def current = line ? CidRecord.parse(line) : null + if (current.sessionId == id) { + return current.runCid + } + } + log.warn("Can't find session $id in CID history file $this") + return null + } + + private void update0(UUID id, String runCid) { + assert id + def newHistory = new StringBuilder() + + this.readLines().each { line -> + try { + def current = line ? CidRecord.parse(line) : null + if (current.sessionId == id) { + log.debug("Updating record for $id in CID history file $this") + current.runCid = runCid + newHistory << current.toString() << '\n' + } else { + newHistory << line << '\n' + } + } + catch (IllegalArgumentException e) { + log.warn("Can't read CID history file: $this", e) + } + } + + // rewrite the history content + this.setText(newHistory.toString()) + } + + @EqualsAndHashCode(includes = 'runName,sessionId') + static class CidRecord { + Date timestamp + String runName + UUID sessionId + String runCid + + CidRecord(UUID sessionId, String name = null) { + this.runName = name + this.sessionId = sessionId + } + + protected CidRecord() {} + + List toList() { + def line = new ArrayList(4) + line << (timestamp ? TIMESTAMP_FMT.format(timestamp) : '-') + line << (runName ?: '-') + line << (sessionId.toString()) + line << (runCid ?: '-') + } + + @Override + String toString() { + toList().join('\t') + } + + static CidRecord parse(String line) { + def cols = line.tokenize('\t') + if (cols.size() == 2) + return new CidRecord(UUID.fromString(cols[0])) + + if (cols.size() == 4) { + + return new CidRecord( + timestamp: TIMESTAMP_FMT.parse(cols[0]), + runName: cols[1], + sessionId: UUID.fromString(cols[2]), + runCid: cols[3] + ) + } + + throw new IllegalArgumentException("Not a valid history entry: `$line`") + } + } + +} \ No newline at end of file diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index f59b582f60..fe33c95032 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -18,11 +18,19 @@ package nextflow.data.cid import groovy.util.logging.Slf4j +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.WorkflowResults import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowRun import nextflow.file.FileHelper +import nextflow.file.FileHolder import nextflow.script.ScriptMeta +import nextflow.script.params.DefaultInParam +import nextflow.script.params.FileInParam +import nextflow.script.params.InParam import nextflow.util.PathNormalizer +import nextflow.util.TestOnly import java.nio.file.Files import java.nio.file.Path @@ -40,35 +48,69 @@ import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord import nextflow.util.CacheHelper +import static nextflow.data.cid.fs.CidPath.CID_PROT +import static nextflow.data.cid.fs.CidPath.METADATA_FILE /** + * Observer to write the generated workflow metadata in a CID store. * * @author Paolo Di Tommaso */ @Slf4j @CompileStatic class CidObserver implements TraceObserver { - public static final String METADATA_FILE = '.data.json' - public static final String CID_PROT = 'cid://' + + private String executionHash private CidStore store private Session session + private WorkflowResults workflowResults + private Map outputsStoreDirCid = new HashMap(10) - @Override - void onFlowCreate(Session session) { + CidObserver(Session session){ this.session = session this.store = session.cidStore } + @Override + void onFlowCreate(Session session) { + this.store.getHistoryFile().write(session.runName, session.uniqueId, '-') + } + + @TestOnly + String getExecutionHash(){ executionHash } + + @Override void onFlowBegin() { - storeWorkflowRun() + this.executionHash = storeWorkflowRun() + workflowResults = new WorkflowResults( + DataType.WorkflowResults, + "$CID_PROT${executionHash}", + new ArrayList()) + this.store.getHistoryFile().update(session.uniqueId, "${CID_PROT}${this.executionHash}") } - protected void storeWorkflowRun() { + @Override + void onFlowComplete(){ + if (this.workflowResults){ + final content = JsonOutput.prettyPrint(JsonOutput.toJson(workflowResults)) + final wfResultsHash = CacheHelper.hasher(content).hash().toString() + this.store.save("${wfResultsHash}/$METADATA_FILE", content) + this.store.getHistoryFile().update(session.uniqueId, "${CID_PROT}${wfResultsHash}") + } + } + + protected String storeWorkflowRun() { final normalizer = new PathNormalizer(session.workflowMetadata) - final mainScript = normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()) + final mainScript = new DataPath( + normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), + session.workflowMetadata.scriptId + ) final workflow = new Workflow( DataType.Workflow, mainScript, - ScriptMeta.allScriptNames().values().collect {normalizer.normalizePath(it.normalize())}, + ScriptMeta.allScriptNames().values().collect { new DataPath( + normalizer.normalizePath(it.normalize()), + CacheHelper.hasher(it.text).hash().toString()) + }, session.workflowMetadata.repository, session.workflowMetadata.commitId ) @@ -79,20 +121,22 @@ class CidObserver implements TraceObserver { session.runName, getNormalizedParams(session.params, normalizer) ) + final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) - store.save("${session.executionHash}/$METADATA_FILE", content) + final executionHash = CacheHelper.hasher(content).hash().toString() + store.save("${executionHash}/$METADATA_FILE", content) + return executionHash } - private static Map getNormalizedParams(Map params, PathNormalizer normalizer){ - final normalizedParams = new HashMap() + private static List getNormalizedParams(Map params, PathNormalizer normalizer){ + final normalizedParams = new LinkedList() params.each{String key, Object value -> - log.debug("Managing parameter $key , class ${value.class}") - if (value instanceof Path) - normalizedParams.put(key,normalizer.normalizePath(value as Path)) - else if (value instanceof String || value instanceof GString) - normalizedParams.put(key,normalizer.normalizePath(value.toString())) + if( value instanceof Path ) + normalizedParams.add( new Parameter( Path.class.simpleName, key, normalizer.normalizePath( value as Path ) ) ) + else if ( value instanceof CharSequence ) + normalizedParams.add( new Parameter( String.class.simpleName, key, normalizer.normalizePath( value.toString() ) ) ) else - normalizedParams.put(key, value) + normalizedParams.add( new Parameter( value.class.simpleName, key, value) ) } return normalizedParams } @@ -109,53 +153,92 @@ class CidObserver implements TraceObserver { storeTaskRun(task, pathNormalizer) // store all task outputs files final outputs = task.getOutputsByType(FileOutParam) - for( Map.Entry entry : outputs ) { - final value = entry.value - if( value instanceof Path ) { - storeTaskOutput(task, (Path)value) - } - else if( value instanceof Collection ) { - for( Path it : value ) - storeTaskOutput(task, (Path)it) + outputs.forEach { FileOutParam key, Object value -> manageFileOutParams(value, task)} + + } + + private void manageFileOutParams( Object value, TaskRun task) { + if (value instanceof Path) { + storeTaskOutput(task, (Path) value) + } else if (value instanceof Collection) { + for (Path it : value) { + storeTaskOutput(task, (Path) it) } } } - protected void storeTaskRun(TaskRun task, PathNormalizer normalizer) { + protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { final value = new nextflow.data.cid.model.TaskRun( DataType.TaskRun, session.uniqueId.toString(), task.getName(), - session.stubRun ? task.stubSource: task.source, - task.inputFilesMap ? convertToReferences(task.inputFilesMap, normalizer): null, + CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), + task.inputs ? manageInputs(task.inputs, normalizer): null, task.isContainerEnabled() ? task.getContainerFingerprint(): null, normalizer.normalizePath(task.getCondaEnv()), normalizer.normalizePath(task.getSpackEnv()), task.config?.getArchitecture()?.toString(), task.processor.getTaskGlobalVars(task), - task.processor.getTaskBinEntries(task.source).collect { Path p -> normalizer.normalizePath(p.normalize()) } - ) + task.processor.getTaskBinEntries(task.source).collect { Path p -> new DataPath(normalizer.normalizePath(p.normalize()), + CacheHelper.hasher(p).hash().toString() )} + ) + // store in the underlying persistence final key = "${task.hash}/$METADATA_FILE" store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + return task.hash.toString() } protected void storeTaskOutput(TaskRun task, Path path) { - final attrs = readAttributes(path) - final rel = task.workDir.relativize(path).toString() - final cid = "${task.hash}/${rel}" - final key = "${cid}/$METADATA_FILE" - final hash = CacheHelper.hasher(path).hash().toString() - final value = new Output( - DataType.TaskOutput, - path.toString(), - hash, - "$CID_PROT$task.hash", - attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis() ) - // store in the underlying persistence - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + try { + final attrs = readAttributes(path) + final rel = getTaskRelative(task, path) + final cid = "${task.hash}/${rel}" + final key = "${cid}/$METADATA_FILE" + final hash = CacheHelper.hasher(path).hash().toString() + final value = new Output( + DataType.TaskOutput, + path.toString(), + hash, + "$CID_PROT$task.hash", + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis()) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + } catch (Throwable e) { + log.warn("Exception storing CID output $path for task ${task.name}. ${e.getLocalizedMessage()}") + } + } + + protected String getTaskRelative(TaskRun task, Path path){ + if (path.isAbsolute()) { + final rel = getTaskRelative0(task, path) + if (rel) return rel + throw new Exception("Cannot asses the relative path for output $path of ${task.name}") + } else { + //Check if contains workdir or storeDir + final rel = getTaskRelative0(task, path.toAbsolutePath()) + if (rel) return rel + if (path.normalize().getName(0).toString() == "..") + throw new Exception("Cannot asses the relative path for output $path of ${task.name}" ) + return path.normalize().toString() + } + + } + + private String getTaskRelative0(TaskRun task, Path path){ + final workDirAbsolute = task.workDir.toAbsolutePath() + if (path.startsWith(workDirAbsolute)) { + return workDirAbsolute.relativize(path).toString() + } + //If task output is not in the workDir check if output is stored in the task's storeDir + final storeDir = task.getConfig().getStoreDir().toAbsolutePath() + if( storeDir && path.startsWith(storeDir)) { + final rel = storeDir.relativize(path) + //If output stored in storeDir, keep the path in case it is used as workflow output + this.outputsStoreDirCid.put(path.toString(), "$CID_PROT${task.hash}/$rel".toString()) + return rel + } } protected BasicFileAttributes readAttributes(Path path) { @@ -164,20 +247,25 @@ class CidObserver implements TraceObserver { @Override void onFilePublish(Path destination, Path source){ - final hash = CacheHelper.hasher(destination).hash().toString() - final rel = session.outputDir.relativize(destination).toString() - final key = "$session.executionHash/${rel}/$METADATA_FILE" - final sourceReference = getSourceReference(source) - final attrs = readAttributes(destination) - final value = new Output( - DataType.WorkflowOutput, - destination.toString(), - hash, - sourceReference, - attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis() ) - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + try { + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = getWorkflowRelative(destination) + final key = "$executionHash/${rel}/$METADATA_FILE" + final sourceReference = getSourceReference(source) + final attrs = readAttributes(destination) + final value = new Output( + DataType.WorkflowOutput, + destination.toString(), + hash, + sourceReference, + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis()) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + workflowResults.outputs.add("${CID_PROT}${executionHash}/${rel}") + } catch (Throwable e) { + log.warn("Exception storing CID output $destination for workflow ${executionHash}.", e) + } } String getSourceReference(Path source){ @@ -185,32 +273,75 @@ class CidObserver implements TraceObserver { if (hash) { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() return "$CID_PROT$hash/$target" + } else { + final storeDirReference = outputsStoreDirCid.get(source.toString()) + if (storeDirReference) + return "$CID_PROT$storeDirReference" } return null } @Override void onFilePublish(Path destination){ - final hash = CacheHelper.hasher(destination).hash().toString() - final rel = session.outputDir.relativize(destination).toString() - final key = "$session.executionHash/${rel}/$METADATA_FILE" - final attrs = readAttributes(destination) - final value = new Output( - DataType.WorkflowOutput, - destination.toString(), - hash, - session.executionHash, - attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis() ) - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + try { + final hash = CacheHelper.hasher(destination).hash().toString() + final rel = getWorkflowRelative(destination) + final key = "$executionHash/${rel}/$METADATA_FILE" + final attrs = readAttributes(destination) + final value = new Output( + DataType.WorkflowOutput, + destination.toString(), + hash, + "${CID_PROT}${executionHash}".toString(), + attrs.size(), + attrs.creationTime().toMillis(), + attrs.lastModifiedTime().toMillis()) + store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + workflowResults.outputs.add("${CID_PROT}${executionHash}/${rel}") + }catch (Throwable e) { + log.warn("Exception storing CID output $destination for workflow ${executionHash}. ${e.getLocalizedMessage()}") + } } - protected List convertToReferences(Map inputs, PathNormalizer normalizer) { - List references = new LinkedList() - inputs.each { name, path -> - final ref = getSourceReference(path) - references.add(ref ? ref : normalizer.normalizePath(path))} - return references + protected String getWorkflowRelative(Path path){ + final outputDirAbs = session.outputDir.toAbsolutePath() + if (path.isAbsolute()) { + if (path.startsWith(outputDirAbs)) { + return outputDirAbs.relativize(path).toString() + } else { + throw new Exception("Cannot asses the relative path for workflow output $path") + } + } else { + final pathAbs = path.toAbsolutePath() + if (pathAbs.startsWith(outputDirAbs)) { + return outputDirAbs.relativize(pathAbs).toString() + } + if (path.normalize().getName(0).toString() == "..") + throw new Exception("Cannot asses the relative path for workflow output $path") + return path.normalize().toString() + } + + } + + protected List manageInputs(Map inputs, PathNormalizer normalizer) { + List managedInputs = new LinkedList() + inputs.forEach{ param, value -> + final type = param.class.simpleName + final name = param.name + if( param instanceof FileInParam ) + managedInputs.add( new Parameter( type, name, manageFileInParam( (List)value , normalizer) ) ) + else if( !(param instanceof DefaultInParam) ) + managedInputs.add( new Parameter( type, name, value) ) + } + return managedInputs + } + + private List manageFileInParam(List files, PathNormalizer normalizer){ + final paths = new LinkedList(); + for( FileHolder it : files ) { + final ref = getSourceReference(it.storePath) + paths.add(ref ? ref : new DataPath(normalizer.normalizePath(it.storePath), CacheHelper.hasher(it.storePath).hash().toString())) + } + return paths } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy index 67017f0bf0..f012c8f130 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy @@ -39,5 +39,6 @@ interface CidStore { Path getPath() + CidHistoryFile getHistoryFile() } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 958cc4ef49..9f35052861 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -27,6 +27,7 @@ import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException /** + * Default Implementation for the a CID store. * * @author Paolo Di Tommaso */ @@ -34,12 +35,13 @@ import nextflow.exception.AbortOperationException @CompileStatic class DefaultCidStore implements CidStore { + private static String HISTORY_FILE_NAME =".history" private Path metaLocation private Path location void open(DataConfig config) { location = config.store.location - metaLocation = location.resolve('.meta') + metaLocation = getMetadataPath(config) if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } @@ -69,5 +71,12 @@ class DefaultCidStore implements CidStore { @Override Path getPath(){ location } + @Override + CidHistoryFile getHistoryFile(){ + return new CidHistoryFile(metaLocation.resolve(HISTORY_FILE_NAME)) + } + + static Path getMetadataPath(DataConfig config){ config.store.location.resolve('.meta') } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy new file mode 100644 index 0000000000..d6105624f7 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy @@ -0,0 +1,130 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import nextflow.data.cid.DefaultCidStore + +import java.nio.file.FileStore +import java.nio.file.FileSystem +import java.nio.file.Path +import java.nio.file.PathMatcher +import java.nio.file.WatchService +import java.nio.file.attribute.UserPrincipalLookupService +import java.nio.file.spi.FileSystemProvider + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig + +/** + * File system for CID Paths + * + * @author Jorge Ejarque + */ +@CompileStatic +class CidFileSystem extends FileSystem { + + private CidFileSystemProvider provider + + private Path basePath + + /* + * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 + */ + protected CidFileSystem(){} + + CidFileSystem(CidFileSystemProvider provider, DataConfig config) { + this.provider = provider + this.basePath = DefaultCidStore.getMetadataPath(config) + } + + Path getBasePath() { + return basePath + } + + @Override + boolean equals( Object other ) { + if( this.class != other.class ) return false + final that = (CidFileSystem)other + this.provider == that.provider && this.basePath == that.basePath + } + + @Override + int hashCode() { + Objects.hash(provider,basePath) + } + + @Override + FileSystemProvider provider() { + return provider + } + + @Override + void close() throws IOException { + + } + + @Override + boolean isOpen() { + return false + } + + @Override + boolean isReadOnly() { + return true + } + + @Override + String getSeparator() { + return CidPath.SEPARATOR + } + + @Override + Iterable getRootDirectories() { + return null + } + + @Override + Iterable getFileStores() { + return null + } + + @Override + Set supportedFileAttributeViews() { + return null + } + + @Override + Path getPath(String first, String... more) { + return new CidPath(this,first,more) + } + + @Override + PathMatcher getPathMatcher(String syntaxAndPattern) { + throw new UnsupportedOperationException(); + } + + @Override + UserPrincipalLookupService getUserPrincipalLookupService() { + throw new UnsupportedOperationException('User Principal Lookup Service not supported') + } + + @Override + WatchService newWatchService() throws IOException { + throw new UnsupportedOperationException('Watch Service not supported') + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy new file mode 100644 index 0000000000..a963c7dd61 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -0,0 +1,320 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.ByteBuffer +import java.nio.channels.SeekableByteChannel +import java.nio.file.AccessDeniedException +import java.nio.file.AccessMode +import java.nio.file.CopyOption +import java.nio.file.DirectoryStream +import java.nio.file.FileStore +import java.nio.file.FileSystem +import java.nio.file.FileSystemNotFoundException +import java.nio.file.LinkOption +import java.nio.file.OpenOption +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.StandardOpenOption +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.attribute.FileAttribute +import java.nio.file.attribute.FileAttributeView +import java.nio.file.spi.FileSystemProvider + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig + +/** + * File System Provider for CID Paths + * + * @author Jorge Ejarque + */ +@CompileStatic +class CidFileSystemProvider extends FileSystemProvider { + + public static final String SCHEME = "cid" + + private CidFileSystem fileSystem + + @Override + String getScheme() { + return SCHEME + } + + protected CidPath toCidPath(Path path) { + if (path !instanceof CidPath) + throw new ProviderMismatchException() + return (CidPath) path + } + + private void checkScheme(URI uri) { + final scheme = uri.scheme.toLowerCase() + if( scheme != getScheme() ) + throw new IllegalArgumentException("Not a valid ${getScheme().toUpperCase()} scheme: $scheme") + } + + @Override + synchronized FileSystem newFileSystem(URI uri, Map config) throws IOException { + checkScheme(uri) + if( !fileSystem ) { + //Overwrite default values with provided configuration + final defaultConfig = DataConfig.asMap() + config.each {defaultConfig.put(it.key, it.value)} + fileSystem = new CidFileSystem(this, new DataConfig(defaultConfig)) + } + return fileSystem + } + + @Override + FileSystem getFileSystem(URI uri) throws FileSystemNotFoundException { + if (!fileSystem) + throw new FileSystemNotFoundException() + return fileSystem + } + + synchronized FileSystem getFileSystemOrCreate(URI uri) { + checkScheme(uri) + if( !fileSystem ) { + fileSystem = (CidFileSystem) newFileSystem(uri, DataConfig.asMap()) + } + return fileSystem + } + + @Override + CidPath getPath(URI uri) { + // the URI authority holds the base component of the CID path + final base = uri.authority + final path = uri.path + return (CidPath) getFileSystemOrCreate(uri).getPath(base, path) + } + + @Override + OutputStream newOutputStream(Path path, OpenOption... options) throws IOException { + throw new UnsupportedOperationException("Write not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + InputStream newInputStream(Path path, OpenOption... options) throws IOException { + final cid = toCidPath(path) + final realPath = cid.getTargetPath() + realPath.fileSystem.provider().newInputStream(realPath, options) + } + + @Override + SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { + final cid = toCidPath(path) + if (options.size() > 0) { + for (OpenOption opt: options) { + // All OpenOption values except for APPEND and WRITE are allowed + if (opt == StandardOpenOption.APPEND || opt == StandardOpenOption.WRITE) + throw new UnsupportedOperationException("'$opt' not allowed"); + } + } + final realPath = cid.getTargetPath() + final channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) + + new SeekableByteChannel() { + + @Override + int read(ByteBuffer dst) throws IOException { + channel.read(dst) + } + + @Override + int write(ByteBuffer src) throws IOException { + throw new UnsupportedOperationException("Write operation not supported") + } + + @Override + long position() throws IOException { + channel.position() + } + + @Override + SeekableByteChannel position(long newPosition) throws IOException { + throw new UnsupportedOperationException("Position operation not supported") + } + + @Override + long size() throws IOException { + channel.size() + } + + @Override + SeekableByteChannel truncate(long unused) throws IOException { + throw new UnsupportedOperationException("Truncate operation not supported") + } + + @Override + boolean isOpen() { + channel.isOpen() + } + + @Override + void close() throws IOException { + channel.close() + } + } + } + + @Override + DirectoryStream newDirectoryStream(Path path, DirectoryStream.Filter filter) throws IOException { + final cid = toCidPath(path) + final real = cid.getTargetPath() + final stream = real + .getFileSystem() + .provider() + .newDirectoryStream(real, new CidFilter(fileSystem)) + + return new DirectoryStream() { + + @Override + Iterator iterator() { + return new CidIterator(fileSystem, stream.iterator(), cid, real) + } + + @Override + void close() throws IOException { + stream.close() + } + } + } + private class CidFilter implements DirectoryStream.Filter { + + private final CidFileSystem fs + + CidFilter(CidFileSystem fs){ + this.fs = fs + } + + @Override + boolean accept(Path entry) throws IOException { + if( entry.startsWith(fs.getBasePath()) && entry.getFileName().toString() == CidPath.METADATA_FILE ) { + return false + } + return true + } + } + + private static CidPath fromRealToCidPath(Path toConvert, Path realBase, CidPath cidBase){ + final fs = cidBase.fileSystem as CidFileSystem + if (toConvert.startsWith(fs.basePath)) { + return new CidPath(fs, toConvert) + } else { + final relative = realBase.relativize(toConvert) + return (CidPath) cidBase.resolve(relative.toString()) + } + } + + private static class CidIterator implements Iterator { + + private final CidFileSystem fs + private final Iterator target + private final CidPath parent + private final Path parentReal + + CidIterator(CidFileSystem fs, Iterator itr, CidPath parent, Path real) { + this.fs = fs + this.target = itr + this.parent = parent + this.parentReal = real + } + + @Override + boolean hasNext() { + return target.hasNext() + } + + @Override + CidPath next() { + final path = target.next() + return path ? fromRealToCidPath(path, parentReal, parent) : null + } + } + + @Override + void createDirectory(Path dir, FileAttribute... attrs) throws IOException { + throw new UnsupportedOperationException("Create directory not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void delete(Path path) throws IOException { + throw new UnsupportedOperationException("Delete not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void copy(Path source, Path target, CopyOption... options) throws IOException { + throw new UnsupportedOperationException("Copy not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void move(Path source, Path target, CopyOption... options) throws IOException { + throw new UnsupportedOperationException("Move not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + boolean isSameFile(Path path, Path path2) throws IOException { + return path == path2 + } + + @Override + boolean isHidden(Path path) throws IOException { + return toCidPath(path).getTargetPath().isHidden() + } + + @Override + FileStore getFileStore(Path path) throws IOException { + throw new UnsupportedOperationException("File store not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void checkAccess(Path path, AccessMode... modes) throws IOException { + final cid = toCidPath(path) + for( AccessMode m : modes ) { + if( m == AccessMode.WRITE ) + throw new AccessDeniedException("Write mode not supported") + if( m == AccessMode.EXECUTE ) + throw new AccessDeniedException("Execute mode not supported") + } + final real = cid.getTargetPath() + real.fileSystem.provider().checkAccess(real, modes) + } + + @Override + V getFileAttributeView(Path path, Class type, LinkOption... options) { + return null + } + + @Override + A readAttributes(Path path, Class type, LinkOption... options) throws IOException { + final cid = toCidPath(path) + final real = cid.getTargetPath() + real.fileSystem.provider().readAttributes(real,type,options) + } + + @Override + Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { + throw new UnsupportedOperationException("Read file attributes not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { + throw new UnsupportedOperationException("Set file attributes not supported by ${getScheme().toUpperCase()} file system provider") + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy new file mode 100644 index 0000000000..4d70f5252c --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -0,0 +1,381 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import groovy.json.JsonSlurper +import groovy.util.logging.Slf4j +import nextflow.data.cid.model.DataType +import nextflow.util.CacheHelper +import nextflow.util.TestOnly + +import static nextflow.data.cid.fs.CidFileSystemProvider.* + +import java.nio.file.FileSystem +import java.nio.file.LinkOption +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.WatchEvent +import java.nio.file.WatchKey +import java.nio.file.WatchService + +import groovy.transform.CompileStatic +import nextflow.file.FileHelper + +/** + * CID file system path + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class CidPath implements Path { + + static public String SEPARATOR = '/' + public static final String METADATA_FILE = '.data.json' + public static final String CID_PROT = "${SCHEME}://".toString() + + static private String[] EMPTY = new String[] {} + + private CidFileSystem fileSystem + + // Path of the file in the metadata cid store + private Path storePath + + // String with the cid file path + private String filePath + + /* + * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 + */ + protected CidPath(){} + + protected CidPath(CidFileSystem fs, Path target) { + this.fileSystem = fs + this.storePath = target + this.filePath = filePath0(fs, target) + } + + CidPath(CidFileSystem fs, String path) { + this(fs, path, EMPTY) + } + + CidPath(CidFileSystem fs, String path, String[] more) { + this.fileSystem = fs + this.storePath = resolve0(fs, norm0(path), norm0(more)) + this.filePath = filePath0(fs, storePath) + } + + @TestOnly + protected String getFilePath(){ this.filePath } + + @TestOnly + protected Path getStorePath(){ this.storePath } + + + /** + * Finds the target path of a CID path + **/ + protected static Path findTarget(Path cidStorePath, CidFileSystem fs, String[] childs=[]){ + assert fs + if( fs.basePath == cidStorePath ) + return null + final metadata = cidStorePath.resolve(METADATA_FILE).toFile() + if ( metadata.exists() ){ + final slurper = new JsonSlurper() + final cidObject = slurper.parse(metadata.text.toCharArray()) as Map + final type = DataType.valueOf(cidObject.type as String) + if( type == DataType.TaskOutput || type == DataType.WorkflowOutput ) { + // return the real path stored in the metadata + final realPath = Path.of(cidObject.path as String, childs) + if( !realPath.exists() ) + throw new FileNotFoundException("Target path $realPath for $cidStorePath does not exists.") + if( cidObject.checksum && CacheHelper.hasher(realPath).hash().toString() != cidObject.checksum ) { + log.warn("Checksum of $cidStorePath does not match with the one stored in the metadata") + } + return realPath + } + } else { + // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output + final parent = cidStorePath.getParent() + if( parent) { + ArrayList newChilds = new ArrayList() + newChilds.add(cidStorePath.getFileName().toString()) + newChilds.addAll(childs) + return findTarget(parent, fs, newChilds as String[]) + } + } + return null + } + + private static String filePath0(CidFileSystem fs, Path target) { + if( !fs ) + return target.toString() + return fs.basePath != target + ? fs.basePath.relativize(target).toString() + : SEPARATOR + } + + private static Path resolve0(CidFileSystem fs, String base, String[] more) { + if( !base || base == SEPARATOR ) { + return resolveEmptyPathCase(fs, more as List) + } + if( base.contains(SEPARATOR) ) { + final parts = base.tokenize(SEPARATOR) + final remain = parts[1..-1] + more.toList() + return resolve0(fs, parts[0], remain as String[]) + } + final result = fs ? fs.basePath.resolve(base) : Path.of(base) + return more + ? result.resolve(more.join(SEPARATOR)) + : result + } + + private static Path resolveEmptyPathCase(CidFileSystem fs, List more ){ + switch(more.size()) { + case 0: + return fs ? fs.basePath : Path.of("/") + case 1: + return resolve0(fs, more[0], EMPTY) + default: + return resolve0(fs, more[0], more[1..-1] as String[]) + } + + } + + static private String norm0(String path) { + if( !path ) + return "" + if( path==SEPARATOR ) + return path + //Remove repeated elements + path = Path.of(path).normalize().toString() + //Remove initial and final separators + if( path.startsWith(SEPARATOR) ) + path = path.substring(1) + if( path.endsWith(SEPARATOR) ) + path = path.substring(0,path.size()-1) + return path + } + + static private String[] norm0(String... path) { + for( int i=0; i1 ) + return subpath(0,c-1) + if( c==1 ) + return new CidPath(fileSystem,"/") + return null + } + + @Override + int getNameCount() { + return fileSystem ? storePath.nameCount-fileSystem.basePath.nameCount : storePath.nameCount + } + + @Override + Path getName(int index) { + if( index<0 ) + throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") + final c= fileSystem.basePath.nameCount + return new CidPath(index==0 ? fileSystem : null, storePath.getName(c + index).toString()) + } + + @Override + Path subpath(int beginIndex, int endIndex) { + if( beginIndex<0 ) + throw new IllegalArgumentException("subpath begin index cannot be less than zero - offending value: $beginIndex") + final c= fileSystem.basePath.nameCount + return new CidPath(beginIndex==0 ? fileSystem : null, storePath.subpath(c+beginIndex, c+endIndex).toString()) + } + + @Override + Path normalize() { + return new CidPath(fileSystem, storePath.normalize()) + } + + @Override + boolean startsWith(Path other) { + return startsWith(other.toString()) + } + + @Override + boolean startsWith(String other) { + return storePath.startsWith(fileSystem.basePath.resolve(other)) + } + + @Override + boolean endsWith(Path other) { + return endsWith(other.toString()) + } + + @Override + boolean endsWith(String other) { + return storePath.endsWith(other) + } + + @Override + Path resolve(Path other) { + if( CidPath.class != other.class ) + throw new ProviderMismatchException() + + final that = (CidPath)other + + if( that.fileSystem && this.fileSystem != that.fileSystem ) + return other + if( that.isAbsolute() ) { + return that + } + if( that.storePath ) { + final newPath = this.storePath.resolve(that.storePath) + return new CidPath(fileSystem, newPath) + } + return this + } + + @Override + Path resolve(String path) { + if( !path ) + return this + final scheme = FileHelper.getUrlProtocol(path) + if( !scheme ) { + // consider the path as a cid relative path + return resolve(new CidPath(null,path)) + } + if( scheme != SCHEME ) { + throw new ProviderMismatchException() + } + final that = fileSystem.provider().getPath(asUri(path)) + return resolve(that) + } + + + @Override + Path relativize(Path other) { + if( CidPath.class != other.class ) { + throw new ProviderMismatchException() + } + final path = storePath.relativize(((CidPath) other).storePath) + return new CidPath(null , path.getNameCount()>0 ? path.toString(): SEPARATOR) + } + + @Override + URI toUri() { + asUri("${SCHEME}://${filePath}") + } + + String toUriString() { + return toUri().toString() + } + + @Override + Path toAbsolutePath() { + return this + } + + @Override + Path toRealPath(LinkOption... options) throws IOException { + return getTargetPath() + } + + protected Path getTargetPath(){ + final target = findTarget(storePath, fileSystem) + return target ? target : storePath + } + + @Override + File toFile() throws IOException { + throw new UnsupportedOperationException("toFile not supported by CidPath") + } + + @Override + WatchKey register(WatchService watcher, WatchEvent.Kind[] events, WatchEvent.Modifier... modifiers) throws IOException { + throw new UnsupportedOperationException("Register not supported by CidPath") + } + + @Override + int compareTo(Path other) { + if( CidPath.class != other.class ) + throw new ProviderMismatchException() + final that = other as CidPath + return this.storePath.compareTo(that.storePath) + } + + @Override + boolean equals(Object other) { + if( CidPath.class != other.class ) { + return false + } + final that = (CidPath)other + return this.fileSystem == that.fileSystem && this.storePath.equals(that.storePath) + } + + /** + * @return The unique hash code for this path + */ + @Override + int hashCode() { + return Objects.hash(fileSystem,storePath) + } + + static URI asUri(String path) { + if (!path) + throw new IllegalArgumentException("Missing 'path' argument") + if (!path.startsWith(CID_PROT)) + throw new IllegalArgumentException("Invalid CID file system path URI - it must start with '${CID_PROT}' prefix - offendinf value: $path") + if (path.startsWith(CID_PROT + SEPARATOR) && path.length() > 7) + throw new IllegalArgumentException("Invalid CID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") + if (path == CID_PROT) //Empty path case + return new URI("") + return new URI(path) + } + + @Override + String toString() { + filePath + } + + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy new file mode 100644 index 0000000000..a7a365a6f7 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy @@ -0,0 +1,61 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig +import nextflow.file.FileHelper +import nextflow.file.FileSystemPathFactory + +import static nextflow.data.cid.fs.CidPath.CID_PROT + +/** + * Implements a {@link FileSystemPathFactory} for CID file system + * + * @author Jorge Ejarque + */ +@CompileStatic +class CidPathFactory extends FileSystemPathFactory { + + @Override + protected Path parseUri(String uri) { + return uri.startsWith(CID_PROT) ? create(uri) : null + } + + @Override + protected String toUriString(Path path) { + return path instanceof CidPath ? ((CidPath)path).toUriString() : null + } + + @Override + protected String getBashLib(Path target) { + return null + } + + @Override + protected String getUploadCmd(String source, Path target) { + return null + } + + static CidPath create(String path) { + final uri = CidPath.asUri(path) + return (CidPath) FileHelper.getOrCreateFileSystemFor(uri, DataConfig.asMap()).provider().getPath(uri) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy new file mode 100644 index 0000000000..18d98f9747 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy @@ -0,0 +1,33 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Models a data path which includes the path and a checksum to validate the content of the path. + * + * @author Jorge Ejarque */ enum DataType { - TaskRun, Workflow, WorkflowRun, TaskOutput, WorkflowOutput + TaskRun, Workflow, WorkflowRun, TaskOutput, WorkflowOutput, WorkflowResults } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy new file mode 100644 index 0000000000..11cbe4ee9d --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Model Workflow and Task Parameters. + * + * @author Jorge Ejarque */ @@ -30,13 +31,13 @@ class TaskRun { DataType type String sessionId String name - String source - List inputs + String code + List inputs String container String conda String spack String architecture Map globalVars - List binEntries + List binEntries List annotations } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy index 643af9ec7e..c4b8824db4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy @@ -22,6 +22,7 @@ import groovy.transform.CompileStatic /** + * Models a workflow definition. * * @author Jorge Ejarque otherScriptFiles + DataPath mainScriptFile + List otherScriptFiles String repository String commitId } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy new file mode 100644 index 0000000000..23d6ad179b --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Models the results of a workflow execution. + * + * @author Jorge Ejarque params } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy index 7bd5512480..64564b3e96 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy @@ -32,12 +32,16 @@ class DataConfig { final DataStoreOpts store DataConfig(Map opts) { - this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) + this.store = new DataStoreOpts(opts.store as Map ?: [:]) + } + + static Map asMap() { + session ? (Map)session.config.navigate('workflow.data') : [:] } static DataConfig create(Session session) { if( session ) { - return new DataConfig(session.config.navigate('cid') as Map ?: Map.of()) + return new DataConfig(session.config.navigate('workflow.data') as Map ?: [:]) } else throw new IllegalStateException("Missing Nextflow session") diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index fce784b543..6d0335f9be 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -146,19 +146,7 @@ class PublishDir { final resolved = value instanceof Closure ? value.call() : value if( resolved instanceof String || resolved instanceof GString ) nullPathWarn = checkNull(resolved.toString()) - if( session?.cidEnabled ){ - final resolvedPath = FileHelper.toPath(resolved) - if (resolvedPath.isAbsolute()){ - log.warn("CID store is enabled but 'publishDir' is set to an absolute path ($resolvedPath). Outputs in this path will not published in the CID store") - this.path = FileHelper.toCanonicalPath(resolved) - } - else{ - this.path = session.outputDir.resolve(resolvedPath) - } - } - else { - this.path = FileHelper.toCanonicalPath(resolved) - } + this.path = FileHelper.toCanonicalPath(resolved) } void setMode( String str ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy index 1ac700e44f..498d50f41f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ScriptRunner.groovy @@ -284,8 +284,7 @@ class ScriptRunner { } def revisionId = scriptFile.commitId ?: scriptFile.scriptId - def executionHash = session.executionHash ?: '-' - HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, executionHash, cli ) + HistoryFile.DEFAULT.write( name, session.uniqueId, revisionId, cli ) } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index 97ccf8a6ff..dd57c4168d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -33,7 +33,7 @@ class DefaultObserverFactory implements TraceObserverFactory { } protected void createCidObserver(Collection result) { - result.add( new CidObserver() ) + result.add( new CidObserver(this.session) ) } protected void createAnsiLogObserver(Collection result) { diff --git a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy index 92a8e93cbe..8e3b8cb73c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy @@ -33,7 +33,7 @@ import nextflow.exception.AbortOperationException * @author Paolo Di Tommaso */ @Slf4j -class HistoryFile extends File { +class HistoryFile extends WithLockFile { static String defaultFileName() { Const.appCacheDir.resolve('history').toString() } @@ -61,14 +61,14 @@ class HistoryFile extends File { super(file.toString()) } - void write( String name, UUID key, String revisionId, String cidHash, args, Date date = null ) { + void write( String name, UUID key, String revisionId, args, Date date = null ) { assert key assert args != null withFileLock { def timestamp = date ?: new Date() def value = args instanceof Collection ? args.join(' ') : args - this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, cidHash: cidHash, command: value).toString() << '\n' + this << new Record(timestamp: timestamp, runName: name, revisionId: revisionId, sessionId: key, command: value).toString() << '\n' } } @@ -350,41 +350,6 @@ class HistoryFile extends File { } - void updateCidHash(String name, String hashCode) { - assert name - assert hashCode - try { - withFileLock {updateCidHash0(name, hashCode) } - } - catch( Throwable e ) { - log.warn "Can't update history file: $this",e - } - } - - private void updateCidHash0(String name, String hashCode){ - def newHistory = new StringBuilder() - - this.readLines().each { line -> - try { - def current = line ? Record.parse(line) : null - if( current?.runName == name ) { - current.cidHash = hashCode - newHistory << current.toString() << '\n' - } - else { - newHistory << line << '\n' - } - } - catch( IllegalArgumentException e ) { - log.warn("Can't read history file: $this", e) - } - } - - // rewrite the history content - this.setText(newHistory.toString()) - } - - @EqualsAndHashCode(includes = 'runName,sessionId') static class Record { Date timestamp @@ -393,7 +358,6 @@ class HistoryFile extends File { String status String revisionId UUID sessionId - String cidHash String command Record(String sessionId, String name=null) { @@ -416,7 +380,6 @@ class HistoryFile extends File { line << (status ?: '-') line << (revisionId ?: '-') line << (sessionId.toString()) - line << (cidHash ?: '-') line << (command ?: '-') } @@ -430,7 +393,7 @@ class HistoryFile extends File { if( cols.size() == 2 ) return new Record(cols[0]) - if( cols.size()== 8 ) { + if( cols.size()==7 ) { return new Record( timestamp: TIMESTAMP_FMT.parse(cols[0]), @@ -439,8 +402,7 @@ class HistoryFile extends File { status: cols[3] && cols[3] != '-' ? cols[3] : null, revisionId: cols[4], sessionId: UUID.fromString(cols[5]), - cidHash: cols[6], - command: cols[7] + command: cols[6] ) } @@ -448,52 +410,7 @@ class HistoryFile extends File { } } - /** - * Apply the given action by using a file lock - * - * @param action The closure implementing the action to be executed with a file lock - * @return The value returned by the action closure - */ - private withFileLock(Closure action) { - - def rnd = new Random() - long ts = System.currentTimeMillis() - String parent = this.parent ?: new File('.').absolutePath - def file = new File(parent, "${this.name}.lock".toString()) - def fos = new FileOutputStream(file) - try { - Throwable error - FileLock lock = null - try { - while( true ) { - lock = fos.getChannel().tryLock() - if( lock ) break - if( System.currentTimeMillis() - ts < 1_000 ) - sleep rnd.nextInt(75) - else { - error = new IllegalStateException("Can't lock file: ${this.absolutePath} -- Nextflow needs to run in a file system that supports file locks") - break - } - } - if( lock ) { - return action.call() - } - } - catch( Exception e ) { - return action.call() - } - finally { - if( lock?.isValid() ) lock.release() - } - - if( error ) throw error - } - finally { - fos.closeQuietly() - file.delete() - } - } Set findAllRunNames() { findAll().findResults{ it.runName } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy new file mode 100644 index 0000000000..20f6553bb6 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy @@ -0,0 +1,78 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util + +import java.nio.channels.FileLock + +/** + * File with a file lock. + * + * @author Jorge Ejarque + */ +class WithLockFile extends File { + + WithLockFile(String filepath){ + super(filepath) + } + + /** + * Apply the given action by using a file lock + * + * @param action The closure implementing the action to be executed with a file lock + * @return The value returned by the action closure + */ + protected withFileLock(Closure action) { + + def rnd = new Random() + long ts = System.currentTimeMillis() + String parent = this.parent ?: new File('.').absolutePath + def file = new File(parent, "${this.name}.lock".toString()) + def fos = new FileOutputStream(file) + try { + Throwable error + FileLock lock = null + + try { + while( true ) { + lock = fos.getChannel().tryLock() + if( lock ) break + if( System.currentTimeMillis() - ts < 1_000 ) + sleep rnd.nextInt(75) + else { + error = new IllegalStateException("Can't lock file: ${this.absolutePath} -- Nextflow needs to run in a file system that supports file locks") + break + } + } + if( lock ) { + return action.call() + } + } + catch( Exception e ) { + return action.call() + } + finally { + if( lock?.isValid() ) lock.release() + } + + if( error ) throw error + } + finally { + fos.closeQuietly() + file.delete() + } + } +} diff --git a/modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider new file mode 100644 index 0000000000..ba80b4b30a --- /dev/null +++ b/modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider @@ -0,0 +1,17 @@ +# +# Copyright 2013-2024, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +nextflow.data.cid.fs.CidFileSystemProvider diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy new file mode 100644 index 0000000000..774a5cd63a --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -0,0 +1,258 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cli + +import groovy.json.JsonOutput + +import java.nio.file.Files + +import nextflow.data.cid.CidHistoryFile +import nextflow.plugin.Plugins + +import org.junit.Rule +import spock.lang.Specification +import test.OutputCapture + +/** + * CLI cid Tests + * + * @author Jorge Ejarque + */ +class CmdCidTest extends Specification { + + def cleanup() { + Plugins.stop() + } + /* + * Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html + */ + @Rule + OutputCapture capture = new OutputCapture() + + def 'should print executions cids' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def historyFile = folder.resolve(".meta/.history") + Files.createDirectories(historyFile.parent) + def uniqueId = UUID.randomUUID() + def date = new Date(); + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def recordEntry = "${CidHistoryFile.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://1234".toString() + historyFile.text = recordEntry + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 2 + stdout[1] == recordEntry + + cleanup: + folder?.deleteDir() + } + + def 'should print no history' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def historyFile = folder.resolve(".meta/.history") + Files.createDirectories(historyFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "No workflow runs CIDs found." + + cleanup: + folder?.deleteDir() + } + + def 'should show cid content' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def cidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + + def recordEntry = JsonOutput.prettyPrint('{"type":"WorkflowOutput",' + + '"path":"/path/to/file",' + + '"checksum":"45372qe",' + + '"source":"cid://123987/file.bam",' + + '"size": 1234,' + + '"createdAt": 123456789,' + + '"modifiedAt": 123456789,' + + '"annotations":null}') + cidFile.text = recordEntry + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid://12345"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == recordEntry.readLines().size() + stdout.join('\n') == recordEntry + + cleanup: + folder?.deleteDir() + } + + def 'should warn if no cid content' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid://12345"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Error loading cid://12345." + + cleanup: + folder?.deleteDir() + } + + def 'should get lineage cid content' (){ + given: + def folder = Files.createTempDirectory('test') + def configFile = folder.resolve('nextflow.config') + def outputHtml = folder.resolve('lineage.html') + configFile.text = "workflow.data.store.location = '$folder'".toString() + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def cidFile = folder.resolve(".meta/12345/file.bam/.data.json") + def cidFile2 = folder.resolve(".meta/123987/file.bam/.data.json") + def cidFile3 = folder.resolve(".meta/123987/.data.json") + def cidFile4 = folder.resolve(".meta/45678/output.txt/.data.json") + def cidFile5 = folder.resolve(".meta/45678/.data.json") + Files.createDirectories(cidFile.parent) + Files.createDirectories(cidFile2.parent) + Files.createDirectories(cidFile3.parent) + Files.createDirectories(cidFile4.parent) + Files.createDirectories(cidFile5.parent) + + def recordEntry = JsonOutput.prettyPrint('{"type":"WorkflowOutput",' + + '"path":"/path/to/file","checksum":"45372qe","source":"cid://123987/file.bam",' + + '"size": 1234,"createdAt": 123456789, "modifiedAt": 123456789,"annotations":null}') + cidFile.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskOutput",' + + '"path":"/path/to/file","checksum":"45372qe","source":"cid://123987",' + + '"size": 1234,"createdAt": 123456789,"modifiedAt": 123456789,"annotations":null}') + cidFile2.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskRun",' + + '"sessionId":"u345-2346-1stw2", "name":"foo","code":"abcde2345",' + + '"inputs": [{"type": "ValueInParam","name": "sample_id","value": "ggal_gut"},' + + '{"type": "FileInParam","name": "reads","value": ["cid://45678/output.txt"]}],' + + '"container": null,"conda": null,"spack": null,"architecture": null,' + + '"globalVars": {},"binEntries": [],"annotations":null}') + cidFile3.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskOutput",' + + '"path":"/path/to/file","checksum":"45372qe","source":"cid://45678",' + + '"size": 1234,"createdAt": 123456789,"modifiedAt": 123456789,"annotations":null}') + cidFile4.text = recordEntry + recordEntry = JsonOutput.prettyPrint('{"type":"TaskRun",' + + '"sessionId":"u345-2346-1stw2", "name":"bar","code":"abfs2556",' + + '"inputs": null,"container": null,"conda": null,"spack": null,"architecture": null,' + + '"globalVars": {},"binEntries": [],"annotations":null}') + cidFile5.text = recordEntry + final network = """flowchart BT + cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} + cid://123987/file.bam@{shape: document, label: "cid://123987/file.bam"} + cid://123987@{shape: process, label: "foo"} + ggal_gut@{shape: document, label: "ggal_gut"} + cid://45678/output.txt@{shape: document, label: "cid://45678/output.txt"} + cid://45678@{shape: process, label: "bar"} + + cid://123987/file.bam -->cid://12345/file.bam + cid://123987 -->cid://123987/file.bam + ggal_gut -->cid://123987 + cid://45678/output.txt -->cid://123987 + cid://45678 -->cid://45678/output.txt +""" + final template = CmdCid.CmdLineage.readTemplate() + def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) + + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["lineage", "cid://12345/file.bam", outputHtml.toString()]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Linage graph for cid://12345/file.bam rendered in ${outputHtml}" + outputHtml.exists() + outputHtml.text == expectedOutput + + + cleanup: + folder?.deleteDir() + } + + + + + + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy index 56aced51b9..8deff84359 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLogTest.groovy @@ -97,7 +97,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','-','run') + history.write(runName,uuid,'b3d3aca8eb','run') when: def log = new CmdLog(basePath: folder, args: [runName]) @@ -167,7 +167,7 @@ class CmdLogTest extends Specification { cache.close() def history = new HistoryFile(folder.resolve(HistoryFile.defaultFileName())) - history.write(runName,uuid,'b3d3aca8eb','-','run') + history.write(runName,uuid,'b3d3aca8eb','run') when: diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy new file mode 100644 index 0000000000..2b3412466d --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy @@ -0,0 +1,158 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import spock.lang.Specification +import spock.lang.TempDir + +import java.nio.file.Files +import java.nio.file.Path + +/** + * CID History file tests + * + * @author Jorge Ejarque + */ +class CidHistoryFileTest extends Specification { + + @TempDir + Path tempDir + + Path historyFile + CidHistoryFile cidHistoryFile + + def setup() { + historyFile = tempDir.resolve("cid-history.txt") + Files.createFile(historyFile) + cidHistoryFile = new CidHistoryFile(historyFile) + } + + def "write should append a new record to the file"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "TestRun" + String runCid = "cid://123" + + when: + cidHistoryFile.write(runName, sessionId, runCid) + + then: + def lines = Files.readAllLines(historyFile) + lines.size() == 1 + def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + parsedRecord.sessionId == sessionId + parsedRecord.runName == runName + parsedRecord.runCid == runCid + } + + def "getRunCid should return correct runCid for existing session"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "Run1" + String runCid = "cid://123" + + and: + cidHistoryFile.write(runName, sessionId, runCid) + + expect: + cidHistoryFile.getRunCid(sessionId) == runCid + } + + def "getRunCid should return null if session does not exist"() { + expect: + cidHistoryFile.getRunCid(UUID.randomUUID()) == null + } + + def "update should modify existing runCid for given session"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "Run1" + String initialCid = "cid-abc" + String updatedCid = "cid-updated" + + and: + cidHistoryFile.write(runName, sessionId, initialCid) + + when: + cidHistoryFile.update(sessionId, updatedCid) + + then: + def lines = Files.readAllLines(historyFile) + lines.size() == 1 + def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + parsedRecord.runCid == updatedCid + } + + def "update should do nothing if session does not exist"() { + given: + UUID existingSessionId = UUID.randomUUID() + UUID nonExistingSessionId = UUID.randomUUID() + String runName = "Run1" + String runCid = "cid://123" + + and: + cidHistoryFile.write(runName, existingSessionId, runCid) + + when: + cidHistoryFile.update(nonExistingSessionId, "new-cid") + + then: + def lines = Files.readAllLines(historyFile) + lines.size() == 1 + def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + parsedRecord.runCid == runCid + } + + def "CidRecord parse should throw for invalid record"() { + when: + CidHistoryFile.CidRecord.parse("invalid-record") + + then: + thrown(IllegalArgumentException) + } + + def "CidRecord parse should handle 4-column record"() { + given: + def timestamp = new Date() + def formattedTimestamp = CidHistoryFile.TIMESTAMP_FMT.format(timestamp) + def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tcid://123" + + when: + def record = CidHistoryFile.CidRecord.parse(line) + + then: + record.timestamp != null + record.runName == "run-1" + record.runCid == "cid://123" + } + + def "CidRecord toString should produce tab-separated format"() { + given: + UUID sessionId = UUID.randomUUID() + def record = new CidHistoryFile.CidRecord(sessionId, "TestRun") + record.timestamp = new Date() + record.runCid = "cid://123" + + when: + def line = record.toString() + + then: + line.contains("\t") + line.split("\t").size() == 4 + } +} + diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index a5c1a3c426..fee5957b76 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -19,41 +19,85 @@ package nextflow.data.cid import groovy.json.JsonOutput import nextflow.data.config.DataConfig +import nextflow.processor.TaskConfig import nextflow.processor.TaskProcessor +import nextflow.script.ScriptBinding +import nextflow.script.WorkflowMetadata import nextflow.util.CacheHelper import nextflow.util.PathNormalizer import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes -import java.nio.file.attribute.FileTime -import java.time.Instant import com.google.common.hash.HashCode import nextflow.Session import nextflow.processor.TaskId import nextflow.processor.TaskRun import spock.lang.Specification + +import static nextflow.data.cid.fs.CidPath.CID_PROT + /** * * @author Paolo Di Tommaso */ class CidObserverTest extends Specification { + def 'should save workflow' (){ + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def store = new DefaultCidStore(); + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> folder.resolve("workDir") + } + def session = Mock(Session) { + getConfig() >> config + getCidStore() >> store + getUniqueId() >> uniqueId + getRunName() >> "test_run" + getWorkflowMetadata() >> metadata + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + def expectedString = '{"type":"WorkflowRun","workflow":{"type": "Workflow",' + + '"mainScriptFile":{"path":"file://' + scriptFile.toString() + '", "checksum": "78910"},' + + '"otherScriptFiles": [], "repository": "https://nextflow.io/nf-test/",' + + '"commitId": "123456" },' + + '"sessionId": "' + uniqueId + '",' + + '"name": "test_run", "params": []}' + when: + observer.onFlowBegin() + then: + folder.resolve(".meta/${observer.executionHash}/.data.json").text == JsonOutput.prettyPrint(expectedString) + + cleanup: + folder?.deleteDir() + } + def 'should save task run' () { given: def folder = Files.createTempDirectory('test') - def config = [cid:[store:[location:folder.toString()]]] + def config = [workflow:[data:[store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def uniqueId = UUID.randomUUID() def session = Mock(Session) { getConfig()>>config getCidStore()>>store getUniqueId()>>uniqueId + getRunName()>>"test_run" } store.open(DataConfig.create(session)) - def observer = new CidObserver() - observer.onFlowCreate(session) + def observer = new CidObserver(session) and: def hash = HashCode.fromInt(123456789) and: @@ -68,13 +112,14 @@ class CidObserverTest extends Specification { getProcessor() >> processor getSource() >> 'echo task source' } + def sourceHash =CacheHelper.hasher('echo task source').hash().toString() def normalizer = Mock(PathNormalizer.class) { normalizePath( _ as Path) >> {Path p -> p?.toString()} normalizePath( _ as String) >> {String p -> p} } def expectedString = '{"type":"TaskRun",' + '"sessionId":"'+uniqueId.toString() + '",' + - '"name":"foo","source":"echo task source",' + + '"name":"foo","code":"' + sourceHash + '",' + '"inputs": null,"container": null,"conda": null,' + '"spack": null,"architecture": null,' + '"globalVars": {},"binEntries": [],"annotations":null}' @@ -90,15 +135,14 @@ class CidObserverTest extends Specification { def 'should save task output' () { given: def folder = Files.createTempDirectory('test') - def config = [cid:[store:[location:folder.toString()]]] + def config = [workflow:[data:[store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def session = Mock(Session) { getConfig()>>config getCidStore()>>store } store.open(DataConfig.create(session)) - def observer = Spy(new CidObserver()) - observer.onFlowCreate(session) + def observer = Spy(new CidObserver(session)) and: def workDir = folder.resolve('12/34567890') Files.createDirectories(workDir) @@ -140,4 +184,205 @@ class CidObserverTest extends Specification { folder?.deleteDir() } + def 'should relativise task output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + def hash = HashCode.fromInt(123456789) + def taskConfig = Mock(TaskConfig){ + getStoreDir() >> STORE_DIR + } + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> WORK_DIR + getConfig() >> taskConfig + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + then: + observer.getTaskRelative(task, PATH) == EXPECTED + where: + WORK_DIR | STORE_DIR | PATH | EXPECTED + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/path/to/work/12/3456789/relative') | "relative" + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/path/to/storeDir/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('work/12/3456789/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('storeDir/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('results/relative') | "results/relative" + Path.of('/path/to/work/12/3456789') | Path.of('storeDir') | Path.of('./relative') | "relative" + } + + def 'should return exception when relativize task output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + } + def hash = HashCode.fromInt(123456789) + def taskConfig = Mock(TaskConfig){ + getStoreDir() >> STORE_DIR + } + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> WORK_DIR + getConfig() >> taskConfig + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + observer.getTaskRelative(task, PATH) + then: + def e = thrown(Exception) + e.message == "Cannot asses the relative path for output $PATH of ${task.name}".toString() + + where: + WORK_DIR | STORE_DIR | PATH + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/another/path/relative') + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('../path/to/storeDir/relative') + } + + def 'should relativise workflow output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getOutputDir()>>OUTPUT_DIR + getConfig()>>config + getCidStore()>>store + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + then: + observer.getWorkflowRelative(PATH) == EXPECTED + where: + OUTPUT_DIR | PATH | EXPECTED + Path.of('/path/to/outDir') | Path.of('/path/to/outDir/relative') | "relative" + Path.of('outDir') | Path.of('outDir/relative') | "relative" + Path.of('/path/to/outDir') | Path.of('results/relative') | "results/relative" + Path.of('/path/to/outDir') | Path.of('./relative') | "relative" + + + } + + def 'should return exception when relativise workflow output dirs' (){ + when: + def config = [workflow:[data:[store:[location:'cid']]]] + def store = new DefaultCidStore(); + def session = Mock(Session) { + getOutputDir()>>OUTPUT_DIR + getConfig()>>config + getCidStore()>>store + } + def observer = new CidObserver(session) + observer.getWorkflowRelative(PATH) + then: + def e = thrown(Exception) + e.message == "Cannot asses the relative path for workflow output $PATH" + where: + OUTPUT_DIR | PATH | EXPECTED + Path.of('/path/to/outDir') | Path.of('/another/path/') | "relative" + Path.of('/path/to/outDir') | Path.of('../relative') | "relative" + + + } + + def 'should save workflow output' (){ + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + def store = new DefaultCidStore(); + def outputDir = folder.resolve('results') + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def workDir= folder.resolve("work") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> workDir + } + def session = Mock(Session) { + getConfig()>>config + getCidStore()>>store + getOutputDir()>>outputDir + getWorkDir() >> workDir + getWorkflowMetadata()>>metadata + getUniqueId()>>uniqueId + getRunName()>>"test_run" + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(DataConfig.create(session)) + def observer = new CidObserver(session) + + when: 'Starting workflow' + observer.onFlowCreate(session) + observer.onFlowBegin() + then: 'History file should contain execution hash' + def cid = store.getHistoryFile().getRunCid(uniqueId).substring(CID_PROT.size()) + cid == observer.executionHash + + when: ' publish output with source file' + def outFile1 = outputDir.resolve('foo/file.bam') + Files.createDirectories(outFile1.parent) + outFile1.text = 'some data1' + def sourceFile1 = workDir.resolve('12/3987/file.bam') + Files.createDirectories(sourceFile1.parent) + sourceFile1.text = 'some data1' + observer.onFilePublish(outFile1, sourceFile1) + then: 'check file 1 output metadata in cid store' + def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) + def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() + def expectedString1 = '{"type":"WorkflowOutput",' + + '"path":"' + outFile1.toString() + '",' + + '"checksum":"'+ fileHash1 + '",' + + '"source":"cid://123987/file.bam",' + + '"size":'+attrs1.size() + ',' + + '"createdAt":' + attrs1.creationTime().toMillis() + ',' + + '"modifiedAt":'+ attrs1.lastModifiedTime().toMillis() + ',' + + '"annotations":null}' + folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == JsonOutput.prettyPrint(expectedString1) + + when: 'publish without source path' + def outFile2 = outputDir.resolve('foo/file2.bam') + Files.createDirectories(outFile2.parent) + outFile2.text = 'some data2' + def attrs2 = Files.readAttributes(outFile2, BasicFileAttributes) + def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() + observer.onFilePublish(outFile2) + then: 'Check outFile2 metadata in cid store' + def expectedString2 = '{"type":"WorkflowOutput",' + + '"path":"' + outFile2.toString() + '",' + + '"checksum":"'+ fileHash2 + '",' + + '"source":"cid://' + observer.executionHash +'",' + + '"size":'+attrs2.size() + ',' + + '"createdAt":' + attrs2.creationTime().toMillis() + ',' + + '"modifiedAt":'+ attrs2.lastModifiedTime().toMillis() + ',' + + '"annotations":null}' + folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == JsonOutput.prettyPrint(expectedString2) + + when: 'Workflow complete' + observer.onFlowComplete() + then: 'Check history file is updated and Workflow Result is written in the cid store' + def expectedString3 = '{"type":"WorkflowResults",' + + '"run":"cid://' + observer.executionHash +'",' + + '"outputs": [ "cid://'+ observer.executionHash + '/foo/file.bam",' + + '"cid://'+ observer.executionHash + '/foo/file2.bam" ]}' + def finalCid = store.getHistoryFile().getRunCid(uniqueId).substring(CID_PROT.size()) + finalCid != observer.executionHash + folder.resolve(".meta/${finalCid}/.data.json").text == JsonOutput.prettyPrint(expectedString3) + + cleanup: + folder?.deleteDir() + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy new file mode 100644 index 0000000000..72979f580b --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy @@ -0,0 +1,372 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import spock.lang.Shared + +import java.nio.ByteBuffer +import java.nio.file.FileSystemNotFoundException +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.StandardOpenOption +import java.nio.file.attribute.BasicFileAttributes + +import nextflow.Global +import nextflow.Session +import spock.lang.Specification + +/** + * CID File system provider tests + * @author Jorge Ejarque + */ +class CidFileSystemProviderTest extends Specification { + + @Shared def wdir = Files.createTempDirectory('wdir') + @Shared def meta = wdir.resolve('.meta') + @Shared def data = wdir.resolve('work') + + def setupSpec(){ + meta.mkdirs() + data.mkdirs() + } + + def cleanupSpec(){ + wdir.deleteDir() + } + + def 'should return cid scheme' () { + given: + def provider = new CidFileSystemProvider() + expect: + provider.getScheme() == 'cid' + } + + def 'should get cid path' () { + given: + def cid = Mock(CidPath) + and: + def provider = new CidFileSystemProvider() + expect: + provider.toCidPath(cid) == cid + + when: + provider.toCidPath(Path.of('foo')) + then: + thrown(ProviderMismatchException) + } + + def 'should create new file system' () { + given: + def provider = new CidFileSystemProvider() + def config = [store:[location:'/data']] + def cid = CidPath.asUri('cid://12345') + when: + def fs = provider.newFileSystem(cid, config) as CidFileSystem + then: + fs.basePath == Path.of('/data/.meta') + } + + def 'should get a file system' () { + given: + def provider = new CidFileSystemProvider() + def config = [store:[location:'/data']] + def uri = CidPath.asUri('cid://12345') + when: + provider.getFileSystem(uri) + then: + thrown(FileSystemNotFoundException) + + when: + provider.newFileSystem(uri, config) as CidFileSystem + and: + def result = provider.getFileSystem(uri) as CidFileSystem + then: + result.basePath == Path.of('/data/.meta') + } + + def 'should get or create a file system' () { + given: + def config = [workflow:[data:[store:[location:'/this/that']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def uri = CidPath.asUri('cid://12345') + def provider = new CidFileSystemProvider() + + when: + def fs = provider.getFileSystemOrCreate(uri) as CidFileSystem + then: + fs.basePath == Path.of('/this/that/.meta') + + when: + def fs2 = provider.getFileSystemOrCreate(uri) as CidFileSystem + then: + fs2.is(fs) + } + + def 'should get a path' () { + given: + def config = [workflow:[data:[store:[location:'/data']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def uri1 = CidPath.asUri('cid://12345') + def uri2 = CidPath.asUri('cid://12345/foo/bar') + + when: + def cid1 = provider.getPath(uri1) + then: + cid1.getTargetPath() == Path.of('/data/.meta/12345') + + when: + def cid2 = provider.getPath(uri2) + then: + cid2.getTargetPath() == Path.of('/data/.meta/12345/foo/bar') + } + + def 'should create new byte channel' () { + given: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def outputMeta = meta.resolve("12345/output.txt") + def output = data.resolve("output.txt") + output.text = "Hello, World!" + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"WorkflowOutput","path":"'+output.toString()+'"}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345/output.txt')) + def opts = Set.of(StandardOpenOption.READ) + when: + def channel = provider.newByteChannel(cid, opts) + and: + def buffer = ByteBuffer.allocate(1000); + def read = channel.read(buffer) + channel.close() + def bytes = new byte[read] + buffer.get(0,bytes) + then: + bytes == "Hello, World!".getBytes() + + cleanup: + outputMeta.deleteDir() + output.delete() + } + + def 'should read cid' () { + given: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def outputMeta = meta.resolve("12345/output.txt") + def output = data.resolve("output.txt") + output.text = "Hello, World!" + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"WorkflowOutput","path":"'+output.toString()+'"}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345/output.txt')) + def opts = Set.of(StandardOpenOption.READ) + + expect: + cid.text == "Hello, World!" + + cleanup: + outputMeta.deleteDir() + output.delete() + } + + def 'should not create a directory' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345')) + + when: + provider.createDirectory(cid) + then: + thrown(UnsupportedOperationException) + + } + + def 'should create directory stream' () { + given: + def output1 = data.resolve('path') + output1.mkdir() + output1.resolve('file1.txt').text = 'file1' + output1.resolve('file2.txt').text = 'file2' + output1.resolve('file3.txt').text = 'file3' + meta.resolve('12345/output1').mkdirs() + meta.resolve('12345/output2').mkdirs() + meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + output1.toString() + '"}' + + and: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345/output1')) + def cid2 = provider.getPath(CidPath.asUri('cid://12345')) + + expect: + Files.exists(cid) + Files.exists(cid.resolve('file1.txt')) + Files.exists(cid.resolve('file2.txt')) + Files.exists(cid.resolve('file3.txt')) + + when: + def stream = provider.newDirectoryStream(cid2, (p) -> true) + and: + def result = stream.toList() + then: + result.toSet() == [ + cid2.resolve('output1'), + cid2.resolve('output2'), + ] as Set + + when: + def stream2 = provider.newDirectoryStream(cid, (p) -> true) + and: + def result2 = stream2.toList() + then: + result2.toSet() == [ + cid.resolve('file1.txt'), + cid.resolve('file2.txt'), + cid.resolve('file3.txt') + ] as Set + + } + + def 'should not delete a file' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345')) + + when: + provider.delete(cid) + then: + thrown(UnsupportedOperationException) + + } + + def 'should not copy a file' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + + when: + provider.copy(cid1, cid2) + then: + thrown(UnsupportedOperationException) + } + + def 'should not move a file' () { + given: + def config = [workflow:[data:[store:[location:'test']]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + + when: + provider.move(cid1, cid2) + then: + thrown(UnsupportedOperationException) + } + + def 'should check is same file' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + def cid3 = provider.getPath(CidPath.asUri('cid://54321/foo')) + + expect: + !provider.isSameFile(cid1, cid2) + !provider.isSameFile(cid1, cid3) + and: + provider.isSameFile(cid2, cid3) + + cleanup: + folder?.deleteDir() + } + + def 'should check is hidden file' () { + given: + def folder = Files.createTempDirectory('test') + def config = [workflow:[data:[store:[location:folder.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://54321/.foo')) + + expect: + !provider.isHidden(cid1) + provider.isHidden(cid2) + + cleanup: + folder?.deleteDir() + } + + def 'should read file attributes' () { + given: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def file = data.resolve('abc') + file.text = 'Hello' + meta.resolve('12345/abc').mkdirs() + meta.resolve('12345/abc/.data.json').text = '{"type":"TaskOutput", "path": "' + file.toString() + '"}' + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + + when: + def attr1 = provider.readAttributes(cid1, BasicFileAttributes) + def real1= Files.readAttributes(file,BasicFileAttributes) + then: + !attr1.directory + attr1.isRegularFile() + attr1.size() == real1.size() + attr1.creationTime() == real1.creationTime() + attr1.lastModifiedTime() == real1.lastModifiedTime() + attr1.lastAccessTime() == real1.lastAccessTime() + + cleanup: + file?.delete() + meta.resolve('12345').deleteDir() + } + +} + diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy new file mode 100644 index 0000000000..fc2592d170 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy @@ -0,0 +1,280 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.file.Files +import java.nio.file.Path + +import spock.lang.Shared +import spock.lang.Specification +import spock.lang.Unroll + +/** + * CID Path Tests + * @author Jorge Ejarque + */ +class CidPathTest extends Specification { + + @Shared def BASE = Path.of('/some/base/data') + @Shared def fs = Mock(CidFileSystem){ getBasePath() >> BASE } + @Shared def wdir = Files.createTempDirectory('wdir') + @Shared def cid = wdir.resolve('.meta') + @Shared def data = wdir.resolve('work') + + def cleanupSpec(){ + wdir.deleteDir() + } + + def 'should create correct cid Path' () { + when: + def cid = new CidPath(FS, PATH, MORE) + then: + cid.storePath == EXPECTED_STORE + cid.filePath == EXPECTED_FILE + where: + FS | PATH | MORE | EXPECTED_STORE | EXPECTED_FILE + fs | '/' | [] as String[] | BASE | '/' + null | '/' | [] as String[] | Path.of('/') | '/' + fs | '/' | ['a','b'] as String[] | BASE.resolve('a/b') | 'a/b' + null | '/' | ['a','b'] as String[] | Path.of('a/b') | 'a/b' + fs | '' | [] as String[] | BASE | '/' + null | '' | [] as String[] | Path.of('/') | '/' + fs | '' | ['a','b'] as String[] | BASE.resolve('a/b') | 'a/b' + null | '' | ['a','b'] as String[] | Path.of('a/b') | 'a/b' + fs | '1234' | [] as String[] | BASE.resolve('1234') | '1234' + null | '1234' | [] as String[] | Path.of('1234') | '1234' + fs | '1234' | ['a','b'] as String[] | BASE.resolve('1234/a/b') | '1234/a/b' + null | '1234' | ['a','b'] as String[] | Path.of('1234/a/b') | '1234/a/b' + fs | '1234/c' | [] as String[] | BASE.resolve('1234/c') | '1234/c' + null | '1234/c' | [] as String[] | Path.of('1234/c') | '1234/c' + fs | '1234/c' | ['a','b'] as String[] | BASE.resolve('1234/c/a/b') | '1234/c/a/b' + null | '1234/c' | ['a','b'] as String[] | Path.of('1234/c/a/b') | '1234/c/a/b' + fs | '/1234/c' | [] as String[] | BASE.resolve('1234/c') | '1234/c' + null | '/1234/c' | [] as String[] | Path.of('1234/c') | '1234/c' + fs | '/1234/c' | ['a','b'] as String[] | BASE.resolve('1234/c/a/b') | '1234/c/a/b' + null | '/1234/c' | ['a','b'] as String[] | Path.of('1234/c/a/b') | '1234/c/a/b' + } + + def 'should get target path' () { + given: + def output1 = data.resolve('output') + output1.resolve('some/path').mkdirs() + output1.resolve('some/path/file1.txt').text = "this is file1" + def output2 = data.resolve('file2.txt') + output2.text = "this is file2" + def cidFs = Mock(CidFileSystem){ getBasePath() >> cid } + cid.resolve('12345/output1').mkdirs() + cid.resolve('12345/path/to/file2.txt').mkdirs() + cid.resolve('12345/.data.json').text = '{"type":"TaskRun"}' + cid.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + output1.toString() + '"}' + cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"TaskOutput", "path": "' + output2.toString() + '"}' + + expect: + new CidPath(cidFs, PATH).getTargetPath() == EXPECTED + where: + PATH | EXPECTED + '/' | cid + '12345' | cid.resolve('12345') + '12345/output1' | data.resolve('output') + '12345/output1/some/path' | data.resolve('output/some/path') + '12345/path/to/' | cid.resolve('12345/path/to/') + '12345/path/to/file2.txt/' | data.resolve('file2.txt') + } + + def 'should get file name' () { + when: + def cid1 = new CidPath(fs, '1234567890/this/file.bam') + then: + cid1.getFileName() == new CidPath(null, 'file.bam') + } + + def 'should get file parent' () { + when: + def cid1 = new CidPath(fs, '1234567890/this/file.bam') + then: + cid1.getParent() == new CidPath(fs, '1234567890/this') + cid1.getParent().getParent() == new CidPath(fs, '1234567890') + cid1.getParent().getParent().getParent() == new CidPath(fs, "/") + cid1.getParent().getParent().getParent().getParent() == null + } + + @Unroll + def 'should get name count' () { + expect: + new CidPath(fs, PATH).getNameCount() == EXPECTED + where: + PATH | EXPECTED + '/' | 0 + '123' | 1 + '123/a' | 2 + '123/a/' | 2 + '123/a/b' | 3 + '' | 0 + } + + @Unroll + def 'should get name by index' () { + expect: + new CidPath(fs, PATH).getName(INDEX) == EXPECTED + where: + PATH | INDEX | EXPECTED + '123' | 0 | new CidPath(fs, '123') + '123/a' | 1 | new CidPath(null, 'a') + '123/a/' | 1 | new CidPath(null, 'a') + '123/a/b' | 2 | new CidPath(null, 'b') + } + + @Unroll + def 'should get subpath' () { + expect: + new CidPath(fs, PATH).subpath(BEGIN,END) == EXPECTED + where: + PATH | BEGIN | END | EXPECTED + '123' | 0 | 1 | new CidPath(fs, '123') + '123/a' | 0 | 2 | new CidPath(fs, '123/a') + '123/a/' | 0 | 2 | new CidPath(fs, '123/a') + '123/a' | 1 | 2 | new CidPath(null, 'a') + '123/a/' | 1 | 2 | new CidPath(null, 'a') + '123/a/b' | 2 | 3 | new CidPath(null, 'b') + '123/a/b' | 1 | 3 | new CidPath(null, 'a/b') + } + + def 'should normalize a path' () { + expect: + new CidPath(fs, '123').normalize() == new CidPath(fs, '123') + new CidPath(fs, '123/a/b').normalize() == new CidPath(fs, '123/a/b') + new CidPath(fs, '123/./a/b').normalize() == new CidPath(fs, '123/a/b') + new CidPath(fs, '123/a/../a/b').normalize() == new CidPath(fs, '123/a/b') + } + + @Unroll + def 'should validate startWith' () { + expect: + new CidPath(fs,PATH).startsWith(OTHER) == EXPECTED + where: + PATH | OTHER | EXPECTED + '12345/a/b' | '12345' | true + '12345/a/b' | '12345/a' | true + '12345/a/b' | '12345/a/b' | true + and: + '12345/a/b' | '12345/b' | false + '12345/a/b' | 'xyz' | false + } + + @Unroll + def 'should validate endsWith' () { + expect: + new CidPath(fs,PATH).endsWith(OTHER) == EXPECTED + where: + PATH | OTHER | EXPECTED + '12345/a/b' | 'b' | true + '12345/a/b' | 'a/b' | true + '12345/a/b' | '12345/a/b' | true + and: + '12345/a/b' | '12345/b' | false + '12345/a/b' | 'xyz' | false + } + + def 'should validate isAbsolute' () { + expect: + new CidPath(fs,'1234/a/b/c').isAbsolute() + new CidPath(fs,'1234/a/b/c').getRoot().isAbsolute() + new CidPath(fs,'1234/a/b/c').getParent().isAbsolute() + new CidPath(fs,'1234/a/b/c').normalize().isAbsolute() + new CidPath(fs,'1234/a/b/c').getName(0).isAbsolute() + new CidPath(fs,'1234/a/b/c').subpath(0,2).isAbsolute() + and: + !new CidPath(fs,'1234/a/b/c').getFileName().isAbsolute() + !new CidPath(fs,'1234/a/b/c').getName(1).isAbsolute() + !new CidPath(fs,'1234/a/b/c').subpath(1,3).isAbsolute() + } + + @Unroll + def 'should get root path' () { + expect: + new CidPath(fs,PATH).getRoot() == new CidPath(fs,EXPECTED) + where: + PATH | EXPECTED + '12345' | '/' + '12345/a' | '/' + } + + def 'should resolve path' () { + when: + def cid1 = new CidPath(fs, '123/a/b/c') + def cid2 = new CidPath(fs, '321/x/y/z') + def rel1 = new CidPath(null, 'foo') + def rel2 = new CidPath(null, 'bar/') + + then: + cid1.resolve(cid2) == cid2 + cid2.resolve(cid1) == cid1 + and: + cid1.resolve(rel1) == new CidPath(fs,'123/a/b/c/foo') + cid1.resolve(rel2) == new CidPath(fs,'123/a/b/c/bar') + and: + rel1.resolve(rel2) == new CidPath(null, 'foo/bar') + rel2.resolve(rel1) == new CidPath(null, 'bar/foo') + } + + def 'should resolve path as string' () { + given: + def pr = Mock(CidFileSystemProvider) + def cidfs = Mock(CidFileSystem){ + getBasePath() >> BASE + provider() >> pr} + + + def cid1 = new CidPath(cidfs, '123/a/b/c') + + expect: + cid1.resolve('x/y') == new CidPath(cidfs, '123/a/b/c/x/y') + cid1.resolve('/x/y/') == new CidPath(cidfs, '123/a/b/c/x/y') + + when: + def result = cid1.resolve('cid://321') + then: + pr.getPath(CidPath.asUri('cid://321')) >> new CidPath(cidfs, '321') + and: + result == new CidPath(cidfs, '321') + } + + @Unroll + def 'should get to uri string' () { + expect: + new CidPath(fs, PATH).toUriString() == EXPECTED + where: + PATH | EXPECTED + '/' | 'cid:///' + '1234' | 'cid://1234' + '1234/a/b/c' | 'cid://1234/a/b/c' + '' | 'cid:///' + } + + @Unroll + def 'should get string' () { + expect: + new CidPath(fs, PATH).toString() == EXPECTED + where: + PATH | EXPECTED + '/' | '/' + '1234' | '1234' + '1234/a/b/c' | '1234/a/b/c' + '' | '/' + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy new file mode 100644 index 0000000000..800a60f637 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy @@ -0,0 +1,88 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import java.nio.file.Path + +import nextflow.Global +import nextflow.Session +import spock.lang.Specification +import spock.lang.Unroll + +/** + * CID Path Factory tests. + * + * @author Jorge Ejarque + */ +class CifPathFactoryTest extends Specification { + + def setup() { + Global.session = Mock(Session) { getConfig()>> [workflow:[data:[store:[location: '/some/data']]]] } + } + + def cleanup() { + Global.session = null + } + + def 'should create cid path' () { + given: + def factory = new CidPathFactory() + + expect: + factory.parseUri('foo') == null + + when: + def p1 = factory.parseUri('cid://12345') + then: + p1.getTargetPath() == Path.of('/some/data/.meta/12345') + p1.toUriString() == 'cid://12345' + + when: + def p2 = factory.parseUri('cid://12345/x/y/z') + then: + p2.getTargetPath() == Path.of('/some/data/.meta/12345/x/y/z') + p2.toUriString() == 'cid://12345/x/y/z' + + when: + def p3 = factory.parseUri('cid://12345//x///y/z//') + then: + p3.getTargetPath() == Path.of('/some/data/.meta/12345/x/y/z') + p2.toUriString() == 'cid://12345/x/y/z' + + when: + factory.parseUri('cid:///12345') + then: + thrown(IllegalArgumentException) + } + + @Unroll + def 'should convert get cid uri string' () { + given: + def factory = new CidPathFactory() + + when: + def cid = CidPathFactory.create(EXPECTED) + then: + factory.toUriString(cid) == EXPECTED + + where: + _ | EXPECTED + _ | 'cid://123' + _ | 'cid://123/a/b/c' + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy index c867304d70..4233f744b1 100644 --- a/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/util/HistoryFileTest.groovy @@ -32,10 +32,10 @@ class HistoryFileTest extends Specification { b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sample.fa -resume 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa -2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' def 'should support custom base dir' () { @@ -66,9 +66,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa def d1 = new Date(now - 50_000) def d2 = new Date(now - 30_000) def d3 = new Date(now - 10_000) - history.write( 'hello_world', id1, 'abc', '-', [1,2,3], d1 ) - history.write( 'super_star', id2, '123', '-', [1,2,3], d2 ) - history.write( 'slow_food', id3, 'xyz', '-', [1,2,3], d3 ) + history.write( 'hello_world', id1, 'abc', [1,2,3], d1 ) + history.write( 'super_star', id2, '123', [1,2,3], d2 ) + history.write( 'slow_food', id3, 'xyz', [1,2,3], d3 ) then: history.getLast() == new HistoryRecord(sessionId: id3, runName: 'slow_food', timestamp: d3, command: '1 2 3') @@ -243,9 +243,9 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa then: history.text == ''' 58d8dd16-ce77-4507-ba1a-ec1ccc9bd2e8\tnextflow run examples/basic.nf --in data/sample.fa - 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello - 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume - 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello + 2016-07-24 16:43:34\t-\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello + 2016-07-24 16:43:34\t-\tsmall_cirum\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume + 2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' .stripIndent() } @@ -306,38 +306,14 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.findAllRunNames() == ['evil_pike', 'gigantic_keller', 'small_cirum', 'modest_bartik'] as Set } - def 'should update cid hash ' () { - given: - def source = ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello -''' - def file = Files.createTempFile('test',null) - file.deleteOnExit() - file.text = source - def history = new HistoryFile(file) - - - when: - history.updateCidHash('evil_pike','cid_hash') - then: - history.text == ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\tcid_hash\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello -''' - } def 'should update the history entries ' () { given: def source = ''' -2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t-\tevil_pike\t-\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' def file = Files.createTempFile('test',null) file.deleteOnExit() @@ -350,10 +326,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('evil_pike',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t-\tsmall_cirum\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' when: @@ -361,10 +337,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('small_cirum',false,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t-\tgigantic_keller\t-\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' when: @@ -372,10 +348,10 @@ b8a3c4cf-17e4-49c6-a4cf-4fd8ddbeef98\tnextflow run examples/ampa.nf --in data/sa history.update('gigantic_keller',true,when) then: history.text == ''' -2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t-\t.nextflow run hello -2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t-\t.nextflow run hello -resume -2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t-\t.nextflow run hello +2016-07-24 16:43:16\t10m\tevil_pike\tOK\t6b9515aba6\te710da1b-ce06-482f-bbcf-987a507f85d1\t.nextflow run hello +2016-07-24 16:43:34\t16s\tgigantic_keller\tOK\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello +2016-07-24 16:43:34\t1h\tsmall_cirum\tERR\t6b9515aba6\t5a6d3877-8823-4ed6-b7fe-2b6748ed4ff9\t.nextflow run hello -resume +2016-07-25 09:58:01\t5 min\tmodest_bartik\tERR\t6b9515aba6\t5910a50f-8656-4765-aa79-f07cef912062\t.nextflow run hello ''' } diff --git a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy index 430222ade5..be885ed40e 100644 --- a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy @@ -1172,6 +1172,8 @@ class FileHelper { } public static HashCode getTaskHashFromPath(Path sourcePath, Path workPath) { + assert sourcePath + assert workPath if (sourcePath.startsWith(workPath)) { Path relativePath = workPath.relativize(sourcePath) if (relativePath.getNameCount() >= 2) { From edfaf5bc749c561245cca1047e8fe873c8325115 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 15:46:03 +0100 Subject: [PATCH 15/72] fix NPE in tests Signed-off-by: jorgee --- .../main/groovy/nextflow/data/cid/CidObserver.groovy | 12 ++++++++---- .../data/cid/fs/CidFileSystemProvider.groovy | 4 +++- .../groovy/nextflow/data/cid/CidObserverTest.groovy | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index fe33c95032..4a52b2ee75 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -104,13 +104,17 @@ class CidObserver implements TraceObserver { normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), session.workflowMetadata.scriptId ) + List otherScripts = new LinkedList<>() + for (Path p: ScriptMeta.allScriptNames().values()) { + if (p && p != session.workflowMetadata.scriptFile) { + otherScripts.add(new DataPath(normalizer.normalizePath(p.normalize()), + CacheHelper.hasher(p.text).hash().toString())) + } + } final workflow = new Workflow( DataType.Workflow, mainScript, - ScriptMeta.allScriptNames().values().collect { new DataPath( - normalizer.normalizePath(it.normalize()), - CacheHelper.hasher(it.text).hash().toString()) - }, + otherScripts, session.workflowMetadata.repository, session.workflowMetadata.commitId ) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy index a963c7dd61..d8a68f65c3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -74,7 +74,9 @@ class CidFileSystemProvider extends FileSystemProvider { if( !fileSystem ) { //Overwrite default values with provided configuration final defaultConfig = DataConfig.asMap() - config.each {defaultConfig.put(it.key, it.value)} + if (config) { + config.forEach {String key,value -> defaultConfig.put(key, value) } + } fileSystem = new CidFileSystem(this, new DataConfig(defaultConfig)) } return fileSystem diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index fee5957b76..3167350cca 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -76,6 +76,7 @@ class CidObserverTest extends Specification { '"sessionId": "' + uniqueId + '",' + '"name": "test_run", "params": []}' when: + observer.onFlowCreate(session) observer.onFlowBegin() then: folder.resolve(".meta/${observer.executionHash}/.data.json").text == JsonOutput.prettyPrint(expectedString) From c207d9264876fc9cc60369ccb7bb3b7b5f9dc438 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 19:51:47 +0100 Subject: [PATCH 16/72] Fix NPE in tests Signed-off-by: jorgee --- .../nextflow/data/cid/fs/CidFileSystemProvider.groovy | 4 +++- .../src/main/groovy/nextflow/data/config/DataConfig.groovy | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy index d8a68f65c3..91f24984ec 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -75,7 +75,9 @@ class CidFileSystemProvider extends FileSystemProvider { //Overwrite default values with provided configuration final defaultConfig = DataConfig.asMap() if (config) { - config.forEach {String key,value -> defaultConfig.put(key, value) } + for (Map.Entry e : config.entrySet()) { + defaultConfig.put(e.key, e.value) + } } fileSystem = new CidFileSystem(this, new DataConfig(defaultConfig)) } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy index 64564b3e96..467598218e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy @@ -32,16 +32,16 @@ class DataConfig { final DataStoreOpts store DataConfig(Map opts) { - this.store = new DataStoreOpts(opts.store as Map ?: [:]) + this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) } static Map asMap() { - session ? (Map)session.config.navigate('workflow.data') : [:] + session?.config?.navigate('workflow.data') as Map ?: new HashMap() } static DataConfig create(Session session) { if( session ) { - return new DataConfig(session.config.navigate('workflow.data') as Map ?: [:]) + return new DataConfig( session.config.navigate('workflow.data') as Map ?: Map.of()) } else throw new IllegalStateException("Missing Nextflow session") From f4b90318a38feb13f8cc3c70002bcf45931e8212 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 20:35:58 +0100 Subject: [PATCH 17/72] Add CidStore factory Signed-off-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 9 +--- .../nextflow/data/cid/CidStoreFactory.groovy | 49 +++++++++++++++++++ .../data/cid/DefaultCidStoreFactory.groovy | 38 ++++++++++++++ .../main/resources/META-INF/extensions.idx | 1 + 4 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index a7e7f79565..9e5352520b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -17,6 +17,7 @@ package nextflow import nextflow.data.cid.CidStore +import nextflow.data.cid.CidStoreFactory import nextflow.data.cid.DefaultCidStore import nextflow.data.config.DataConfig @@ -407,17 +408,11 @@ class Session implements ISession { if(config.navigate('workflow.data')) { this.cidEnabled = true - this.cidStore = createCidStore(this) + this.cidStore = CidStoreFactory.create(DataConfig.create(this)) } } - protected static CidStore createCidStore(Session session){ - final store = new DefaultCidStore() - store.open(DataConfig.create(session)) - return store - } - protected Path cloudCachePath(Map cloudcache, Path workDir) { if( !cloudcache?.enabled ) return null diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy new file mode 100644 index 0000000000..f27e3f7602 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy @@ -0,0 +1,49 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.config.DataConfig +import nextflow.plugin.Plugins +import org.pf4j.ExtensionPoint + +/** + * Factory for CidStore + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +abstract class CidStoreFactory implements ExtensionPoint { + + protected abstract CidStore newInstance(DataConfig config) + + static CidStore create(DataConfig config){ + final all = Plugins.getPriorityExtensions(CidStoreFactory) + if( !all ) + throw new IllegalStateException("Unable to find Nextflow CID store factory") + final factory = all.first() + log.debug "Using Nextflow CID store factory: ${factory.getClass().getName()}" + return factory.newInstance(config) + + + } + + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy new file mode 100644 index 0000000000..df8e9243a4 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy @@ -0,0 +1,38 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import groovy.transform.CompileStatic +import nextflow.data.config.DataConfig +import nextflow.plugin.Priority + +/** + * Default Factory for CidStore + * + * @author Jorge Ejarque + */ +@CompileStatic +@Priority(0) +class DefaultCidStoreFactory extends CidStoreFactory{ + + @Override + protected CidStore newInstance(DataConfig config) { + final cidStore = new DefaultCidStore() + cidStore.open(config) + return cidStore + } +} diff --git a/modules/nextflow/src/main/resources/META-INF/extensions.idx b/modules/nextflow/src/main/resources/META-INF/extensions.idx index 7fb037c37d..e7ba19b1ab 100644 --- a/modules/nextflow/src/main/resources/META-INF/extensions.idx +++ b/modules/nextflow/src/main/resources/META-INF/extensions.idx @@ -25,4 +25,5 @@ nextflow.mail.SimpleMailProvider nextflow.mail.JavaMailProvider nextflow.processor.tip.DefaultTaskTipProvider nextflow.fusion.FusionTokenDefault +nextflow.data.cid.DefaultCidStoreFactory From b89cdf147f5ba12807c3c655e635c64533813e59 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 27 Feb 2025 20:49:54 +0100 Subject: [PATCH 18/72] fix cid paht hash validation Signed-off-by: jorgee --- .../main/groovy/nextflow/data/cid/fs/CidPath.groovy | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy index 4d70f5252c..98dadb65ec 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -80,6 +80,15 @@ class CidPath implements Path { this.filePath = filePath0(fs, storePath) } + private static void validateHash(Map cidObject) { + final hashedPath = Path.of(cidObject.path as String) + if( !hashedPath.exists() ) + throw new FileNotFoundException("Target path $cidObject.path does not exists.") + if( cidObject.checksum && CacheHelper.hasher(hashedPath).hash().toString() != cidObject.checksum ) { + log.warn("Checksum of $hashedPath does not match with the one stored in the metadata") + } + } + @TestOnly protected String getFilePath(){ this.filePath } @@ -101,12 +110,10 @@ class CidPath implements Path { final type = DataType.valueOf(cidObject.type as String) if( type == DataType.TaskOutput || type == DataType.WorkflowOutput ) { // return the real path stored in the metadata + validateHash(cidObject) final realPath = Path.of(cidObject.path as String, childs) if( !realPath.exists() ) throw new FileNotFoundException("Target path $realPath for $cidStorePath does not exists.") - if( cidObject.checksum && CacheHelper.hasher(realPath).hash().toString() != cidObject.checksum ) { - log.warn("Checksum of $cidStorePath does not match with the one stored in the metadata") - } return realPath } } else { From 34cc0b19beb00ff8ef3cfb1d0f57212d9ffc1975 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 1 Mar 2025 16:15:11 +0100 Subject: [PATCH 19/72] Cleanup and formatting Signed-off-by: Paolo Di Tommaso --- .../nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy | 7 +------ .../main/groovy/nextflow/data/cid/CidStoreFactory.groovy | 3 --- .../main/groovy/nextflow/data/cid/DefaultCidStore.groovy | 1 - .../nextflow/data/cid/DefaultCidStoreFactory.groovy | 1 + .../groovy/nextflow/data/cid/fs/CidFileSystem.groovy | 4 ++-- .../nextflow/data/cid/fs/CidFileSystemProvider.groovy | 8 ++++---- .../src/main/groovy/nextflow/data/cid/fs/CidPath.groovy | 9 +++------ 7 files changed, 11 insertions(+), 22 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index 3b17ca35b8..53476e0277 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -58,8 +58,6 @@ class CmdCid extends CmdBase { commands << new CmdLog() commands << new CmdShow() commands << new CmdLineage() - - } @Parameter(hidden = true) @@ -115,12 +113,9 @@ class CmdCid extends CmdBase { .build() final session = new Session(config) printHistory(session.cidStore) - } private void printHistory(CidStore store) { - - final historyFile = store.getHistoryFile() if (historyFile.exists()) { def table = new TableBuilder(cellSeparator: '\t') @@ -140,6 +135,7 @@ class CmdCid extends CmdBase { println 'Usage: nextflow cid log' } } + class CmdShow implements SubCmd{ @Override @@ -176,7 +172,6 @@ class CmdCid extends CmdBase { } } - class CmdLineage implements SubCmd { @Canonical diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy index f27e3f7602..b48a0fa963 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy @@ -41,9 +41,6 @@ abstract class CidStoreFactory implements ExtensionPoint { final factory = all.first() log.debug "Using Nextflow CID store factory: ${factory.getClass().getName()}" return factory.newInstance(config) - - } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 9f35052861..65096cb5de 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -78,5 +78,4 @@ class DefaultCidStore implements CidStore { static Path getMetadataPath(DataConfig config){ config.store.location.resolve('.meta') } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy index df8e9243a4..c88fdb0459 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy @@ -35,4 +35,5 @@ class DefaultCidStoreFactory extends CidStoreFactory{ cidStore.open(config) return cidStore } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy index d6105624f7..683c912b25 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy @@ -43,8 +43,8 @@ class CidFileSystem extends FileSystem { private Path basePath /* - * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 - */ + * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 + */ protected CidFileSystem(){} CidFileSystem(CidFileSystemProvider provider, DataConfig config) { diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy index 91f24984ec..2f27d2e20b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -136,7 +136,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override int read(ByteBuffer dst) throws IOException { - channel.read(dst) + return channel.read(dst) } @Override @@ -146,7 +146,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override long position() throws IOException { - channel.position() + return channel.position() } @Override @@ -156,7 +156,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override long size() throws IOException { - channel.size() + return channel.size() } @Override @@ -166,7 +166,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override boolean isOpen() { - channel.isOpen() + return channel.isOpen() } @Override diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy index 98dadb65ec..43919df25e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -45,11 +45,11 @@ import nextflow.file.FileHelper @CompileStatic class CidPath implements Path { - static public String SEPARATOR = '/' + static public final String SEPARATOR = '/' public static final String METADATA_FILE = '.data.json' - public static final String CID_PROT = "${SCHEME}://".toString() + public static final String CID_PROT = "${SCHEME}://" - static private String[] EMPTY = new String[] {} + static private final String[] EMPTY = new String[] {} private CidFileSystem fileSystem @@ -161,7 +161,6 @@ class CidPath implements Path { default: return resolve0(fs, more[0], more[1..-1] as String[]) } - } static private String norm0(String path) { @@ -298,7 +297,6 @@ class CidPath implements Path { return resolve(that) } - @Override Path relativize(Path other) { if( CidPath.class != other.class ) { @@ -384,5 +382,4 @@ class CidPath implements Path { filePath } - } From bd96bc83cf8b58a9286004279fb3c279c949445a Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Mon, 3 Mar 2025 19:54:22 +0100 Subject: [PATCH 20/72] Decouple cid store from session (#5833) [ci fast] Signed-off-by: Paolo Di Tommaso Signed-off-by: jorgee Co-authored-by: Jorge Ejarque Co-authored-by: jorgee --- .../src/main/groovy/nextflow/Session.groovy | 18 --------- .../main/groovy/nextflow/cli/CmdCid.groovy | 26 +++++++----- .../nextflow/data/cid/CidObserver.groovy | 4 +- .../nextflow/data/cid/CidStoreFactory.groovy | 30 +++++++++++++- .../nextflow/data/config/DataConfig.groovy | 4 ++ .../trace/DefaultObserverFactory.groovy | 9 +++-- .../groovy/nextflow/cli/CmdCidTest.groovy | 39 +++++++++--------- .../nextflow/data/cid/CidObserverTest.groovy | 40 ++++++++----------- .../data/config/DataConfigTest.groovy | 12 +++++- 9 files changed, 105 insertions(+), 77 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 9e5352520b..98c7017d73 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -16,11 +16,6 @@ package nextflow -import nextflow.data.cid.CidStore -import nextflow.data.cid.CidStoreFactory -import nextflow.data.cid.DefaultCidStore -import nextflow.data.config.DataConfig - import java.nio.file.Files import java.nio.file.Path import java.nio.file.Paths @@ -259,14 +254,6 @@ class Session implements ISession { private boolean statsEnabled - private volatile boolean cidEnabled - - boolean getCidEnabled() { cidEnabled } - - private CidStore cidStore - - CidStore getCidStore() { cidStore } - private WorkflowMetadata workflowMetadata private WorkflowStatsObserver statsObserver @@ -406,11 +393,6 @@ class Session implements ISession { // -- file porter config this.filePorter = new FilePorter(this) - if(config.navigate('workflow.data')) { - this.cidEnabled = true - this.cidStore = CidStoreFactory.create(DataConfig.create(this)) - } - } protected Path cloudCachePath(Map cloudcache, Path workDir) { diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index 53476e0277..bdf5943806 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -26,6 +26,7 @@ import nextflow.config.ConfigBuilder import nextflow.dag.MermaidHtmlRenderer import nextflow.data.cid.CidHistoryFile import nextflow.data.cid.CidStore +import nextflow.data.cid.CidStoreFactory import nextflow.data.cid.model.DataType import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins @@ -112,7 +113,12 @@ class CmdCid extends CmdBase { .setBaseDir(Paths.get('.')) .build() final session = new Session(config) - printHistory(session.cidStore) + final store = CidStoreFactory.getOrCreate(session) + if (store) { + printHistory(store) + } else { + println "Error CID store not loaded. Check Nextflow configuration." + } } private void printHistory(CidStore store) { @@ -157,12 +163,15 @@ class CmdCid extends CmdBase { .setOptions(getLauncher().getOptions()) .setBaseDir(Paths.get('.')) .build() - final session = new Session(config) - final store = session.cidStore - try { - println store.load(key).toString() - }catch (Throwable e){ - println "Error loading ${args[0]}." + final store = CidStoreFactory.getOrCreate(new Session(config)) + if (store) { + try { + println store.load(key).toString() + } catch (Throwable e) { + println "Error loading ${args[0]}." + } + } else { + println "Error CID store not loaded. Check Nextflow configuration." } } @@ -196,8 +205,7 @@ class CmdCid extends CmdBase { .setOptions(getLauncher().getOptions()) .setBaseDir(Paths.get('.')) .build() - final session = new Session(config) - final store = session.cidStore + final store = CidStoreFactory.getOrCreate(new Session(config)) final template = readTemplate() final network = getLineage(store, args[0]) Path file = Path.of(args[1]) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 4a52b2ee75..578678fbe4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -65,9 +65,9 @@ class CidObserver implements TraceObserver { private WorkflowResults workflowResults private Map outputsStoreDirCid = new HashMap(10) - CidObserver(Session session){ + CidObserver(Session session, CidStore store){ this.session = session - this.store = session.cidStore + this.store = store } @Override diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy index b48a0fa963..1592f36928 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy @@ -19,8 +19,10 @@ package nextflow.data.cid import groovy.transform.CompileStatic import groovy.util.logging.Slf4j +import nextflow.Session import nextflow.data.config.DataConfig import nextflow.plugin.Plugins +import nextflow.util.TestOnly import org.pf4j.ExtensionPoint /** @@ -32,9 +34,13 @@ import org.pf4j.ExtensionPoint @CompileStatic abstract class CidStoreFactory implements ExtensionPoint { + private static CidStore instance + + private static boolean initialized + protected abstract CidStore newInstance(DataConfig config) - static CidStore create(DataConfig config){ + private static CidStore create(DataConfig config){ final all = Plugins.getPriorityExtensions(CidStoreFactory) if( !all ) throw new IllegalStateException("Unable to find Nextflow CID store factory") @@ -43,4 +49,26 @@ abstract class CidStoreFactory implements ExtensionPoint { return factory.newInstance(config) } + static CidStore getOrCreate(Session session) { + if( instance || initialized ) + return instance + synchronized (CidStoreFactory.class) { + if( instance || initialized ) + return instance + initialized = true + final config = DataConfig.create(session) + if( !config.enabled ) + return null + return instance = create(config) + } + } + + @TestOnly + static void reset(){ + synchronized (CidStoreFactory.class) { + instance = null + initialized = false + } + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy index 467598218e..7b5f0687dd 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy @@ -20,6 +20,7 @@ package nextflow.data.config import groovy.transform.CompileStatic import nextflow.Global import nextflow.Session +import nextflow.util.TestOnly /** * Model workflow data config @@ -31,8 +32,11 @@ class DataConfig { final DataStoreOpts store + final boolean enabled + DataConfig(Map opts) { this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) + this.enabled = opts.enabled as boolean ?: false } static Map asMap() { diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index dd57c4168d..ce1782ffc4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -4,6 +4,7 @@ import java.nio.file.Path import nextflow.Session import nextflow.data.cid.CidObserver +import nextflow.data.cid.CidStoreFactory /** * Creates Nextflow observes object @@ -26,14 +27,14 @@ class DefaultObserverFactory implements TraceObserverFactory { createTimelineObserver(result) createDagObserver(result) createAnsiLogObserver(result) - if( session.cidEnabled ){ - createCidObserver(result) - } + createCidObserver(result) return result } protected void createCidObserver(Collection result) { - result.add( new CidObserver(this.session) ) + final store = CidStoreFactory.getOrCreate(session) + if( store ) + result.add( new CidObserver(this.session, store) ) } protected void createAnsiLogObserver(Collection result) { diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index 774a5cd63a..d833a34e9c 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -17,6 +17,7 @@ package nextflow.cli import groovy.json.JsonOutput +import nextflow.data.cid.CidStoreFactory import java.nio.file.Files @@ -27,6 +28,7 @@ import org.junit.Rule import spock.lang.Specification import test.OutputCapture + /** * CLI cid Tests * @@ -36,7 +38,13 @@ class CmdCidTest extends Specification { def cleanup() { Plugins.stop() + CidStoreFactory.reset() + } + + def setupSpec() { + CidStoreFactory.reset() } + /* * Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html */ @@ -45,9 +53,9 @@ class CmdCidTest extends Specification { def 'should print executions cids' (){ given: - def folder = Files.createTempDirectory('test') + def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() def historyFile = folder.resolve(".meta/.history") Files.createDirectories(historyFile.parent) def uniqueId = UUID.randomUUID() @@ -71,15 +79,15 @@ class CmdCidTest extends Specification { stdout.size() == 2 stdout[1] == recordEntry - cleanup: - folder?.deleteDir() + //cleanup: + //folder?.deleteDir() } def 'should print no history' (){ given: - def folder = Files.createTempDirectory('test') + def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() def historyFile = folder.resolve(".meta/.history") Files.createDirectories(historyFile.parent) def launcher = Mock(Launcher){ @@ -105,9 +113,9 @@ class CmdCidTest extends Specification { def 'should show cid content' (){ given: - def folder = Files.createTempDirectory('test') + def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() def cidFile = folder.resolve(".meta/12345/.data.json") Files.createDirectories(cidFile.parent) def launcher = Mock(Launcher){ @@ -143,9 +151,9 @@ class CmdCidTest extends Specification { def 'should warn if no cid content' (){ given: - def folder = Files.createTempDirectory('test') + def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } @@ -170,10 +178,10 @@ class CmdCidTest extends Specification { def 'should get lineage cid content' (){ given: - def folder = Files.createTempDirectory('test') + def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') def outputHtml = folder.resolve('lineage.html') - configFile.text = "workflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } @@ -245,14 +253,9 @@ class CmdCidTest extends Specification { outputHtml.exists() outputHtml.text == expectedOutput - cleanup: folder?.deleteDir() - } - - - - + } } diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index 3167350cca..3d678638a7 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -47,7 +47,7 @@ class CidObserverTest extends Specification { def 'should save workflow' (){ given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def uniqueId = UUID.randomUUID() def scriptFile = folder.resolve("main.nf") @@ -61,14 +61,13 @@ class CidObserverTest extends Specification { } def session = Mock(Session) { getConfig() >> config - getCidStore() >> store getUniqueId() >> uniqueId getRunName() >> "test_run" getWorkflowMetadata() >> metadata getParams() >> new ScriptBinding.ParamsMap() } store.open(DataConfig.create(session)) - def observer = new CidObserver(session) + def observer = new CidObserver(session, store) def expectedString = '{"type":"WorkflowRun","workflow":{"type": "Workflow",' + '"mainScriptFile":{"path":"file://' + scriptFile.toString() + '", "checksum": "78910"},' + '"otherScriptFiles": [], "repository": "https://nextflow.io/nf-test/",' + @@ -88,17 +87,16 @@ class CidObserverTest extends Specification { def 'should save task run' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def uniqueId = UUID.randomUUID() def session = Mock(Session) { getConfig()>>config - getCidStore()>>store getUniqueId()>>uniqueId getRunName()>>"test_run" } store.open(DataConfig.create(session)) - def observer = new CidObserver(session) + def observer = new CidObserver(session, store) and: def hash = HashCode.fromInt(123456789) and: @@ -136,14 +134,13 @@ class CidObserverTest extends Specification { def 'should save task output' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def session = Mock(Session) { getConfig()>>config - getCidStore()>>store } store.open(DataConfig.create(session)) - def observer = Spy(new CidObserver(session)) + def observer = Spy(new CidObserver(session, store)) and: def workDir = folder.resolve('12/34567890') Files.createDirectories(workDir) @@ -187,11 +184,10 @@ class CidObserverTest extends Specification { def 'should relativise task output dirs' (){ when: - def config = [workflow:[data:[store:[location:'cid']]]] + def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] def store = new DefaultCidStore(); def session = Mock(Session) { getConfig()>>config - getCidStore()>>store } def hash = HashCode.fromInt(123456789) def taskConfig = Mock(TaskConfig){ @@ -205,7 +201,7 @@ class CidObserverTest extends Specification { getConfig() >> taskConfig } store.open(DataConfig.create(session)) - def observer = new CidObserver(session) + def observer = new CidObserver(session, store) then: observer.getTaskRelative(task, PATH) == EXPECTED where: @@ -220,11 +216,10 @@ class CidObserverTest extends Specification { def 'should return exception when relativize task output dirs' (){ when: - def config = [workflow:[data:[store:[location:'cid']]]] + def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] def store = new DefaultCidStore(); def session = Mock(Session) { getConfig()>>config - getCidStore()>>store } def hash = HashCode.fromInt(123456789) def taskConfig = Mock(TaskConfig){ @@ -238,7 +233,7 @@ class CidObserverTest extends Specification { getConfig() >> taskConfig } store.open(DataConfig.create(session)) - def observer = new CidObserver(session) + def observer = new CidObserver(session, store) observer.getTaskRelative(task, PATH) then: def e = thrown(Exception) @@ -252,15 +247,14 @@ class CidObserverTest extends Specification { def 'should relativise workflow output dirs' (){ when: - def config = [workflow:[data:[store:[location:'cid']]]] + def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] def store = new DefaultCidStore(); def session = Mock(Session) { getOutputDir()>>OUTPUT_DIR getConfig()>>config - getCidStore()>>store } store.open(DataConfig.create(session)) - def observer = new CidObserver(session) + def observer = new CidObserver(session, store) then: observer.getWorkflowRelative(PATH) == EXPECTED where: @@ -275,14 +269,13 @@ class CidObserverTest extends Specification { def 'should return exception when relativise workflow output dirs' (){ when: - def config = [workflow:[data:[store:[location:'cid']]]] + def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] def store = new DefaultCidStore(); def session = Mock(Session) { getOutputDir()>>OUTPUT_DIR getConfig()>>config - getCidStore()>>store } - def observer = new CidObserver(session) + def observer = new CidObserver(session, store) observer.getWorkflowRelative(PATH) then: def e = thrown(Exception) @@ -298,7 +291,7 @@ class CidObserverTest extends Specification { def 'should save workflow output' (){ given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] def store = new DefaultCidStore(); def outputDir = folder.resolve('results') def uniqueId = UUID.randomUUID() @@ -314,7 +307,6 @@ class CidObserverTest extends Specification { } def session = Mock(Session) { getConfig()>>config - getCidStore()>>store getOutputDir()>>outputDir getWorkDir() >> workDir getWorkflowMetadata()>>metadata @@ -323,7 +315,7 @@ class CidObserverTest extends Specification { getParams() >> new ScriptBinding.ParamsMap() } store.open(DataConfig.create(session)) - def observer = new CidObserver(session) + def observer = new CidObserver(session, store) when: 'Starting workflow' observer.onFlowCreate(session) diff --git a/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy index 3c7ac18489..c604c5b86d 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy @@ -31,12 +31,22 @@ class DataConfigTest extends Specification { def config = new DataConfig(Map.of()) then: config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() + !config.enabled + } + + def 'should create default with enable' () { + when: + def config = new DataConfig([enabled: true]) + then: + config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() + config.enabled } def 'should create data config' () { when: - def config = new DataConfig(store: [location: "/some/data/store"]) + def config = new DataConfig(enabled: true, store: [location: "/some/data/store"]) then: config.store.location == Path.of("/some/data/store") + config.enabled } } From 4c0ef8f5401fcc0313c8ffcba2d02d78d53615b4 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 4 Mar 2025 11:23:45 +0100 Subject: [PATCH 21/72] Add cid command help Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdCid.groovy | 64 +++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index bdf5943806..e22af660cf 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -18,6 +18,7 @@ package nextflow.cli import com.beust.jcommander.Parameter +import com.beust.jcommander.Parameters import groovy.json.JsonSlurper import groovy.transform.Canonical import groovy.transform.CompileStatic @@ -43,12 +44,14 @@ import static nextflow.data.cid.fs.CidPath.METADATA_FILE * @author Paolo Di Tommaso */ @CompileStatic -class CmdCid extends CmdBase { +@Parameters(commandDescription = "Explore workflows CID metadata.") +class CmdCid extends CmdBase implements UsageAware{ private static final String NAME = 'cid' interface SubCmd { String getName() + String getDescription() void apply(List args) void usage() } @@ -80,6 +83,41 @@ class CmdCid extends CmdBase { getCmd(args).apply(args.drop(1)) } + /** + * Print the command usage help + */ + void usage() { + usage(args) + } + + /** + * Print the command usage help + * + * @param args The arguments as entered by the user + */ + void usage(List args) { + if( !args ) { + List result = [] + result << this.getClass().getAnnotation(Parameters).commandDescription() + result << "Usage: nextflow $NAME [options]".toString() + result << '' + result << 'Commands:' + int len = 0 + commands.forEach {len = it.name.size() > len ? it.name.size() : len } + commands.sort(){it.name}.each { result << " ${it.name.padRight(len)}\t${it.description}".toString() } + result << '' + println result.join('\n').toString() + } + else { + def sub = commands.find { it.name == args[0] } + if( sub ) + sub.usage() + else { + throw new AbortOperationException("Unknown $NAME sub-command: ${args[0]}") + } + } + } + protected SubCmd getCmd(List args) { def cmd = commands.find { it.name == args[0] } @@ -101,6 +139,11 @@ class CmdCid extends CmdBase { return 'log' } + @Override + String getDescription() { + return 'Print the CID execution log' + } + @Override void apply(List args) { if (args.size() != 0) { @@ -138,7 +181,8 @@ class CmdCid extends CmdBase { @Override void usage() { - println 'Usage: nextflow cid log' + println description + println "Usage: nextflow $NAME $name" } } @@ -149,6 +193,11 @@ class CmdCid extends CmdBase { return 'show' } + @Override + String getDescription() { + return 'Print the description of a CID reference' + } + @Override void apply(List args) { if (args.size() != 1) { @@ -177,7 +226,8 @@ class CmdCid extends CmdBase { @Override void usage() { - println 'Usage: nextflow cid show ' + println description + println "Usage: nextflow $NAME $name " } } @@ -193,6 +243,11 @@ class CmdCid extends CmdBase { @Override String getName() { 'lineage' } + @Override + String getDescription() { + return 'Render a lineage graph for a workflow output' + } + @Override void apply(List args) { if (args.size() != 2) { @@ -325,7 +380,8 @@ class CmdCid extends CmdBase { @Override void usage() { - println 'Usage: nextflow cid lineage ' + println description + println "Usage: nextflow $NAME $name " } } From b9de3e6e0cda7ab582393fb6821120f44b20505f Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 4 Mar 2025 13:16:51 +0100 Subject: [PATCH 22/72] Fix CID store errors when workflow outputs in s3 Signed-off-by: jorgee --- .../src/main/groovy/nextflow/data/cid/CidObserver.groovy | 6 +++--- .../src/main/groovy/nextflow/data/cid/fs/CidPath.groovy | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index 578678fbe4..bdeb1dd393 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -202,7 +202,7 @@ class CidObserver implements TraceObserver { final hash = CacheHelper.hasher(path).hash().toString() final value = new Output( DataType.TaskOutput, - path.toString(), + path.toUriString(), hash, "$CID_PROT$task.hash", attrs.size(), @@ -259,7 +259,7 @@ class CidObserver implements TraceObserver { final attrs = readAttributes(destination) final value = new Output( DataType.WorkflowOutput, - destination.toString(), + destination.toUriString(), hash, sourceReference, attrs.size(), @@ -294,7 +294,7 @@ class CidObserver implements TraceObserver { final attrs = readAttributes(destination) final value = new Output( DataType.WorkflowOutput, - destination.toString(), + destination.toUriString(), hash, "${CID_PROT}${executionHash}".toString(), attrs.size(), diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy index 43919df25e..2bb6e1c086 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -81,7 +81,7 @@ class CidPath implements Path { } private static void validateHash(Map cidObject) { - final hashedPath = Path.of(cidObject.path as String) + final hashedPath = FileHelper.toCanonicalPath(cidObject.path as String) if( !hashedPath.exists() ) throw new FileNotFoundException("Target path $cidObject.path does not exists.") if( cidObject.checksum && CacheHelper.hasher(hashedPath).hash().toString() != cidObject.checksum ) { @@ -111,7 +111,9 @@ class CidPath implements Path { if( type == DataType.TaskOutput || type == DataType.WorkflowOutput ) { // return the real path stored in the metadata validateHash(cidObject) - final realPath = Path.of(cidObject.path as String, childs) + def realPath = FileHelper.toCanonicalPath(cidObject.path as String) + if (childs && childs.size() > 0) + realPath = realPath.resolve(childs.join(SEPARATOR)) if( !realPath.exists() ) throw new FileNotFoundException("Target path $realPath for $cidStorePath does not exists.") return realPath @@ -212,7 +214,7 @@ class CidPath implements Path { if( c>1 ) return subpath(0,c-1) if( c==1 ) - return new CidPath(fileSystem,"/") + return new CidPath(fileSystem,SEPARATOR) return null } From 063a0ec8509dd868ffec7dea5be5b0e9873bcf63 Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Tue, 11 Mar 2025 17:45:37 +0100 Subject: [PATCH 23/72] Decouple CID FileSystem from Local file system and other fixes (#5866) Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdCid.groovy | 37 ++-- .../nextflow/data/cid/CidHistoryFile.groovy | 179 ++++++++++++------ .../nextflow/data/cid/CidHistoryLog.groovy | 65 +++++++ .../nextflow/data/cid/CidHistoryRecord.groovy | 80 ++++++++ .../nextflow/data/cid/CidObserver.groovy | 77 +++++--- .../groovy/nextflow/data/cid/CidStore.groovy | 29 ++- .../nextflow/data/cid/CidStoreFactory.groovy | 2 +- .../nextflow/data/cid/DefaultCidStore.groovy | 35 ++-- .../nextflow/data/cid/fs/CidFileSystem.groovy | 15 +- .../data/cid/fs/CidFileSystemProvider.groovy | 13 +- .../nextflow/data/cid/fs/CidPath.groovy | 122 ++++++------ .../nextflow/data/cid/model/Checksum.groovy | 34 ++++ .../nextflow/data/cid/model/DataPath.groovy | 2 +- .../nextflow/data/cid/model/Output.groovy | 2 +- .../nextflow/data/cid/model/TaskRun.groovy | 2 +- .../nextflow/data/config/DataStoreOpts.groovy | 6 +- .../nextflow/processor/TaskProcessor.groovy | 4 + .../groovy/nextflow/util/HistoryFile.groovy | 47 ++++- .../groovy/nextflow/util/WithLockFile.groovy | 78 -------- .../groovy/nextflow/cli/CmdCidTest.groovy | 11 +- .../data/cid/CidHistoryFileTest.groovy | 102 +++++----- .../data/cid/CidHistoryRecordTest.groovy | 63 ++++++ .../nextflow/data/cid/CidObserverTest.groovy | 19 +- .../data/cid/DefaultCidStoreTest.groovy | 97 ++++++++++ .../cid/fs/CidFileSystemProviderTest.groovy | 64 +++---- .../nextflow/data/cid/fs/CidPathTest.groovy | 159 +++++++++++----- .../data/cid/fs/CifPathFactoryTest.groovy | 12 +- .../data/config/DataConfigTest.groovy | 7 +- 28 files changed, 925 insertions(+), 438 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryLog.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryRecord.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/data/cid/model/Checksum.groovy delete mode 100644 modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryRecordTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/data/cid/DefaultCidStoreTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index e22af660cf..dc3f9e8abe 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -25,7 +25,7 @@ import groovy.transform.CompileStatic import nextflow.Session import nextflow.config.ConfigBuilder import nextflow.dag.MermaidHtmlRenderer -import nextflow.data.cid.CidHistoryFile +import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStore import nextflow.data.cid.CidStoreFactory import nextflow.data.cid.model.DataType @@ -37,7 +37,6 @@ import java.nio.file.Path import java.nio.file.Paths import static nextflow.data.cid.fs.CidPath.CID_PROT -import static nextflow.data.cid.fs.CidPath.METADATA_FILE /** * @@ -165,14 +164,17 @@ class CmdCid extends CmdBase implements UsageAware{ } private void printHistory(CidStore store) { - final historyFile = store.getHistoryFile() - if (historyFile.exists()) { + final records = store.historyLog?.records + if( records ) { def table = new TableBuilder(cellSeparator: '\t') .head('TIMESTAMP') .head('RUN NAME') .head('SESSION ID') .head('RUN CID') - historyFile.eachLine { table.append(CidHistoryFile.CidRecord.parse(it).toList()) } + .head('RESULT CID') + for( CidHistoryRecord record: records ){ + table.append(record.toList()) + } println table.toString() } else { println("No workflow runs CIDs found.") @@ -207,7 +209,7 @@ class CmdCid extends CmdBase implements UsageAware{ } if (!args[0].startsWith(CID_PROT)) throw new Exception("Identifier is not a CID URL") - final key = args[0].substring(CID_PROT.size()) + "/$METADATA_FILE" + final key = args[0].substring(CID_PROT.size()) final config = new ConfigBuilder() .setOptions(getLauncher().getOptions()) .setBaseDir(Paths.get('.')) @@ -215,7 +217,11 @@ class CmdCid extends CmdBase implements UsageAware{ final store = CidStoreFactory.getOrCreate(new Session(config)) if (store) { try { - println store.load(key).toString() + final entry = store.load(key) + if( entry ) + println entry.toString() + else + println "No entry found for ${args[0]}." } catch (Throwable e) { println "Error loading ${args[0]}." } @@ -292,7 +298,7 @@ class CmdCid extends CmdBase implements UsageAware{ if (!nodeToRender.startsWith(CID_PROT)) throw new Exception("Identifier is not a CID URL") final slurper = new JsonSlurper() - final key = nodeToRender.substring(CID_PROT.size()) + "/$METADATA_FILE" + final key = nodeToRender.substring(CID_PROT.size()) final cidObject = slurper.parse(store.load(key).toString().toCharArray()) as Map switch (DataType.valueOf(cidObject.type as String)) { case DataType.TaskOutput: @@ -357,10 +363,17 @@ class CmdCid extends CmdBase implements UsageAware{ } if (value instanceof Map) { if (value.path) { - final label = convertToLabel(value.path.toString()) - lines << " ${value.path}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(value.path.toString(), nodeToRender)) - return + final path = value.path.toString() + if (path.startsWith(CID_PROT)) { + nodes.add(path) + edges.add(new Edge(path, nodeToRender)) + return + } else { + final label = convertToLabel(path) + lines << " ${path}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(path, nodeToRender)) + return + } } } final label = convertToLabel(value.toString()) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy index 07b4e24b16..d8217f6186 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy @@ -16,13 +16,13 @@ */ package nextflow.data.cid -import groovy.transform.EqualsAndHashCode import groovy.util.logging.Slf4j -import nextflow.util.WithLockFile +import java.nio.channels.FileChannel +import java.nio.channels.FileLock +import java.nio.file.Files import java.nio.file.Path -import java.text.DateFormat -import java.text.SimpleDateFormat +import java.nio.file.StandardOpenOption /** * File to store a history of the workflow executions and their corresponding CIDs @@ -30,115 +30,174 @@ import java.text.SimpleDateFormat * @author Jorge Ejarque */ @Slf4j -class CidHistoryFile extends WithLockFile { - private static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') +class CidHistoryFile implements CidHistoryLog { + + Path path CidHistoryFile(Path file) { - super(file.toString()) + this.path = file } - void write(String name, UUID key, String runCid, Date date = null) { + void write(String name, UUID key, String runCid, String resultsCid, Date date = null) { assert key withFileLock { def timestamp = date ?: new Date() - log.debug("Writting record for $key in CID history file $this") - this << new CidRecord(timestamp: timestamp, runName: name, sessionId: key, runCid: runCid).toString() << '\n' + log.trace("Writting record for $key in CID history file $this") + path << new CidHistoryRecord(timestamp, name, key, runCid, resultsCid).toString() << '\n' } } - void update(UUID sessionId, String runCid) { + void updateRunCid(UUID sessionId, String runCid) { assert sessionId try { - withFileLock { update0(sessionId, runCid) } + withFileLock { updateRunCid0(sessionId, runCid) } } catch (Throwable e) { - log.warn "Can't update cid history file: $this", e + log.warn "Can't update CID history file: $this", e.message } } - String getRunCid(UUID id){ + void updateResultsCid(UUID sessionId, String resultsCid) { + assert sessionId + + try { + withFileLock { updateResultsCid0(sessionId, resultsCid) } + } + catch (Throwable e) { + log.warn "Can't update CID history file: $this", e.message + } + } + + List getRecords(){ + List list = new LinkedList() + try { + withFileLock { this.path.eachLine {list.add(CidHistoryRecord.parse(it)) } } + } + catch (Throwable e) { + log.warn "Can't read records from CID history file: $this", e.message + } + return list + } + + + CidHistoryRecord getRecord(UUID id) { assert id - for (String line: this.readLines()){ - def current = line ? CidRecord.parse(line) : null + for (String line : this.path.readLines()) { + def current = line ? CidHistoryRecord.parse(line) : null if (current.sessionId == id) { - return current.runCid + return current } } log.warn("Can't find session $id in CID history file $this") return null } - private void update0(UUID id, String runCid) { + + private void updateRunCid0(UUID id, String runCid) { assert id def newHistory = new StringBuilder() - this.readLines().each { line -> + this.path.readLines().each { line -> try { - def current = line ? CidRecord.parse(line) : null + def current = line ? CidHistoryRecord.parse(line) : null if (current.sessionId == id) { - log.debug("Updating record for $id in CID history file $this") - current.runCid = runCid - newHistory << current.toString() << '\n' + log.trace("Updating record for $id in CID history file $this") + final newRecord = new CidHistoryRecord(current.timestamp, current.runName, current.sessionId, runCid, current.resultsCid) + newHistory << newRecord.toString() << '\n' } else { newHistory << line << '\n' } } catch (IllegalArgumentException e) { - log.warn("Can't read CID history file: $this", e) + log.warn("Can't read CID history file: $this", e.message) } } // rewrite the history content - this.setText(newHistory.toString()) + this.path.setText(newHistory.toString()) } - @EqualsAndHashCode(includes = 'runName,sessionId') - static class CidRecord { - Date timestamp - String runName - UUID sessionId - String runCid + private void updateResultsCid0(UUID id, String resultsCid) { + assert id + def newHistory = new StringBuilder() - CidRecord(UUID sessionId, String name = null) { - this.runName = name - this.sessionId = sessionId + this.path.readLines().each { line -> + try { + def current = line ? CidHistoryRecord.parse(line) : null + if (current.sessionId == id) { + log.trace("Updating record for $id in CID history file $this") + final newRecord = new CidHistoryRecord(current.timestamp, current.runName, current.sessionId, current.runCid, resultsCid) + newHistory << newRecord.toString() << '\n' + } else { + newHistory << line << '\n' + } + } + catch (IllegalArgumentException e) { + log.warn("Can't read CID history file: $this", e.message) + } } - protected CidRecord() {} + // rewrite the history content + this.path.setText(newHistory.toString()) + } - List toList() { - def line = new ArrayList(4) - line << (timestamp ? TIMESTAMP_FMT.format(timestamp) : '-') - line << (runName ?: '-') - line << (sessionId.toString()) - line << (runCid ?: '-') + /** + * Apply the given action by using a file lock + * + * @param action The closure implementing the action to be executed with a file lock + * @return The value returned by the action closure + */ + protected withFileLock(Closure action) { + + def rnd = new Random() + long ts = System.currentTimeMillis() + final parent = this.path.parent ?: Path.of('.').toAbsolutePath() + Files.createDirectories(parent) + def file = parent.resolve("${this.path.name}.lock".toString()) + FileChannel fos + try { + fos = FileChannel.open(file, StandardOpenOption.WRITE, StandardOpenOption.CREATE) + } catch (UnsupportedOperationException e){ + log.warn("File System Provider for ${this.path} do not support file locking. Continuing without lock...") + return action.call() } - - @Override - String toString() { - toList().join('\t') + if (!fos){ + throw new IllegalStateException("Can't create a file channel for ${this.path.toAbsolutePath()}") } + try { + Throwable error + FileLock lock = null - static CidRecord parse(String line) { - def cols = line.tokenize('\t') - if (cols.size() == 2) - return new CidRecord(UUID.fromString(cols[0])) - - if (cols.size() == 4) { - - return new CidRecord( - timestamp: TIMESTAMP_FMT.parse(cols[0]), - runName: cols[1], - sessionId: UUID.fromString(cols[2]), - runCid: cols[3] - ) + try { + while (true) { + lock = fos.tryLock() + if (lock) break + if (System.currentTimeMillis() - ts < 1_000) + sleep rnd.nextInt(75) + else { + error = new IllegalStateException("Can't lock file: ${this.path.toAbsolutePath()} -- Nextflow needs to run in a file system that supports file locks") + break + } + } + if (lock) { + return action.call() + } + } + catch (Exception e) { + return action.call() + } + finally { + if (lock?.isValid()) lock.release() } - throw new IllegalArgumentException("Not a valid history entry: `$line`") + if (error) throw error + } + finally { + fos.closeQuietly() + file.delete() } } - } \ No newline at end of file diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryLog.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryLog.groovy new file mode 100644 index 0000000000..bc541b7760 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryLog.groovy @@ -0,0 +1,65 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +/** + * Interface to log workflow executions and their corresponding CIDs + * + * @author Jorge Ejarque + */ +interface CidHistoryLog { + /** + * Write a workflow execution CidHistoryLog record. + * + * @param name Workflow execution name. + * @param sessionId Workflow session ID. + * @param runCid Workflow run CID. + * @param resultsCid Workflow results CID. + */ + void write(String name, UUID sessionId, String runCid, String resultsCid) + + /** + * Updates the run CID for a given session ID. + * + * @param sessionId Workflow session ID. + * @param runCid Workflow run CID. + */ + void updateRunCid(UUID sessionId, String runCid) + + /** + * Updates the results CID for a given session ID. + * + * @param sessionId Workflow session ID. + * @param resultsCid Workflow results CID. + */ + void updateResultsCid(UUID sessionId, String resultsCid) + + /** + * Get the store records in the CidHistoryLog. + * + * @return List stored CIDHistoryRecords. + */ + List getRecords() + + /** + * Get the record for a given + * @param sessionId Workflow session ID. + * @return CIDHistoryRecord for the given ID. + */ + CidHistoryRecord getRecord(UUID sessionId) + +} \ No newline at end of file diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryRecord.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryRecord.groovy new file mode 100644 index 0000000000..744b114e22 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryRecord.groovy @@ -0,0 +1,80 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import groovy.transform.CompileStatic +import groovy.transform.EqualsAndHashCode + +import java.text.DateFormat +import java.text.SimpleDateFormat + +/** + * Record of workflow executions and their corresponding CIDs + * + * @author Jorge Ejarque + */ +@CompileStatic +@EqualsAndHashCode(includes = 'runName,sessionId') +class CidHistoryRecord { + public static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') + final Date timestamp + final String runName + final UUID sessionId + final String runCid + final String resultsCid + + CidHistoryRecord(Date timestamp, String name, UUID sessionId, String runCid, String resultsCid = null) { + this.timestamp = timestamp + this.runName = name + this.sessionId = sessionId + this.runCid = runCid + this.resultsCid = resultsCid + } + + CidHistoryRecord(UUID sessionId, String name = null) { + this.runName = name + this.sessionId = sessionId + } + + protected CidHistoryRecord() {} + + List toList() { + def line = new ArrayList(4) + line << (timestamp ? TIMESTAMP_FMT.format(timestamp) : '-') + line << (runName ?: '-') + line << (sessionId.toString()) + line << (runCid ?: '-') + line << (resultsCid ?: '-') + } + + @Override + String toString() { + toList().join('\t') + } + + static CidHistoryRecord parse(String line) { + def cols = line.tokenize('\t') + if (cols.size() == 2) + return new CidHistoryRecord(UUID.fromString(cols[0])) + + if (cols.size() == 5) { + return new CidHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3], cols[4]) + } + + throw new IllegalArgumentException("Not a valid history entry: `$line`") + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index bdeb1dd393..e7b6eeef82 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -18,6 +18,7 @@ package nextflow.data.cid import groovy.util.logging.Slf4j +import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.WorkflowResults @@ -49,7 +50,7 @@ import nextflow.trace.TraceRecord import nextflow.util.CacheHelper import static nextflow.data.cid.fs.CidPath.CID_PROT -import static nextflow.data.cid.fs.CidPath.METADATA_FILE + /** * Observer to write the generated workflow metadata in a CID store. * @@ -72,7 +73,7 @@ class CidObserver implements TraceObserver { @Override void onFlowCreate(Session session) { - this.store.getHistoryFile().write(session.runName, session.uniqueId, '-') + this.store.getHistoryLog().write(session.runName, session.uniqueId, '-', '-') } @TestOnly @@ -85,7 +86,7 @@ class CidObserver implements TraceObserver { DataType.WorkflowResults, "$CID_PROT${executionHash}", new ArrayList()) - this.store.getHistoryFile().update(session.uniqueId, "${CID_PROT}${this.executionHash}") + this.store.getHistoryLog().updateRunCid(session.uniqueId, "${CID_PROT}${this.executionHash}") } @Override @@ -93,8 +94,8 @@ class CidObserver implements TraceObserver { if (this.workflowResults){ final content = JsonOutput.prettyPrint(JsonOutput.toJson(workflowResults)) final wfResultsHash = CacheHelper.hasher(content).hash().toString() - this.store.save("${wfResultsHash}/$METADATA_FILE", content) - this.store.getHistoryFile().update(session.uniqueId, "${CID_PROT}${wfResultsHash}") + this.store.save(wfResultsHash, content) + this.store.getHistoryLog().updateResultsCid(session.uniqueId, "${CID_PROT}${wfResultsHash}") } } @@ -102,13 +103,21 @@ class CidObserver implements TraceObserver { final normalizer = new PathNormalizer(session.workflowMetadata) final mainScript = new DataPath( normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), - session.workflowMetadata.scriptId + new Checksum(session.workflowMetadata.scriptId, "nextflow", CacheHelper.HashMode.DEFAULT().toString()) ) List otherScripts = new LinkedList<>() for (Path p: ScriptMeta.allScriptNames().values()) { if (p && p != session.workflowMetadata.scriptFile) { - otherScripts.add(new DataPath(normalizer.normalizePath(p.normalize()), - CacheHelper.hasher(p.text).hash().toString())) + otherScripts.add( + new DataPath( + normalizer.normalizePath(p.normalize()), + new Checksum( + CacheHelper.hasher(p.text).hash().toString(), + "nextflow", + CacheHelper.HashMode.DEFAULT().toString() + ) + ) + ) } } final workflow = new Workflow( @@ -128,7 +137,7 @@ class CidObserver implements TraceObserver { final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) final executionHash = CacheHelper.hasher(content).hash().toString() - store.save("${executionHash}/$METADATA_FILE", content) + store.save(executionHash, content) return executionHash } @@ -172,25 +181,29 @@ class CidObserver implements TraceObserver { } protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { + final codeChecksum = new Checksum(CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), + "nextflow", CacheHelper.HashMode.DEFAULT().toString()) final value = new nextflow.data.cid.model.TaskRun( DataType.TaskRun, session.uniqueId.toString(), task.getName(), - CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), + codeChecksum, task.inputs ? manageInputs(task.inputs, normalizer): null, task.isContainerEnabled() ? task.getContainerFingerprint(): null, normalizer.normalizePath(task.getCondaEnv()), normalizer.normalizePath(task.getSpackEnv()), task.config?.getArchitecture()?.toString(), task.processor.getTaskGlobalVars(task), - task.processor.getTaskBinEntries(task.source).collect { Path p -> new DataPath(normalizer.normalizePath(p.normalize()), - CacheHelper.hasher(p).hash().toString() )} + task.processor.getTaskBinEntries(task.source).collect { Path p -> new DataPath( + normalizer.normalizePath(p.normalize()), + new Checksum(CacheHelper.hasher(p).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString()) ) + } ) // store in the underlying persistence - final key = "${task.hash}/$METADATA_FILE" + final key = task.hash.toString() store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) - return task.hash.toString() + return key } protected void storeTaskOutput(TaskRun task, Path path) { @@ -198,12 +211,13 @@ class CidObserver implements TraceObserver { final attrs = readAttributes(path) final rel = getTaskRelative(task, path) final cid = "${task.hash}/${rel}" - final key = "${cid}/$METADATA_FILE" - final hash = CacheHelper.hasher(path).hash().toString() + final key = cid.toString() + final checksum = new Checksum( CacheHelper.hasher(path).hash().toString(), + "nextflow", CacheHelper.HashMode.DEFAULT().toString() ) final value = new Output( DataType.TaskOutput, path.toUriString(), - hash, + checksum, "$CID_PROT$task.hash", attrs.size(), attrs.creationTime().toMillis(), @@ -252,21 +266,25 @@ class CidObserver implements TraceObserver { @Override void onFilePublish(Path destination, Path source){ try { - final hash = CacheHelper.hasher(destination).hash().toString() + final checksum = new Checksum( + CacheHelper.hasher(destination).hash().toString(), + "nextflow", + CacheHelper.HashMode.DEFAULT().toString() + ) final rel = getWorkflowRelative(destination) - final key = "$executionHash/${rel}/$METADATA_FILE" + final key = "$executionHash/${rel}" final sourceReference = getSourceReference(source) final attrs = readAttributes(destination) final value = new Output( DataType.WorkflowOutput, destination.toUriString(), - hash, + checksum, sourceReference, attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis()) store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) - workflowResults.outputs.add("${CID_PROT}${executionHash}/${rel}") + workflowResults.outputs.add("${CID_PROT}${key}") } catch (Throwable e) { log.warn("Exception storing CID output $destination for workflow ${executionHash}.", e) } @@ -288,20 +306,24 @@ class CidObserver implements TraceObserver { @Override void onFilePublish(Path destination){ try { - final hash = CacheHelper.hasher(destination).hash().toString() + final checksum = new Checksum( + CacheHelper.hasher(destination).hash().toString(), + "nextflow", + CacheHelper.HashMode.DEFAULT().toString() + ) final rel = getWorkflowRelative(destination) - final key = "$executionHash/${rel}/$METADATA_FILE" + final key = "$executionHash/${rel}" final attrs = readAttributes(destination) final value = new Output( DataType.WorkflowOutput, destination.toUriString(), - hash, + checksum, "${CID_PROT}${executionHash}".toString(), attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis()) store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) - workflowResults.outputs.add("${CID_PROT}${executionHash}/${rel}") + workflowResults.outputs.add("${CID_PROT}${key}") }catch (Throwable e) { log.warn("Exception storing CID output $destination for workflow ${executionHash}. ${e.getLocalizedMessage()}") } @@ -344,7 +366,10 @@ class CidObserver implements TraceObserver { final paths = new LinkedList(); for( FileHolder it : files ) { final ref = getSourceReference(it.storePath) - paths.add(ref ? ref : new DataPath(normalizer.normalizePath(it.storePath), CacheHelper.hasher(it.storePath).hash().toString())) + paths.add(ref ? new DataPath(ref) : new DataPath( + normalizer.normalizePath(it.storePath), + new Checksum(CacheHelper.hasher(it.storePath).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString())) + ) } return paths } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy index f012c8f130..b22c8b4682 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy @@ -22,23 +22,44 @@ import java.util.function.Consumer import groovy.transform.CompileStatic import nextflow.data.config.DataConfig + /** - * + * Interface for the CID store * @author Paolo Di Tommaso */ @CompileStatic interface CidStore { + /** + * Open the CID store. + * @param config Configuration to open the CID store. + */ void open(DataConfig config) + /** + * Save a CID entry in the store for in a given key. + * @param key Entry key. + * @param value Entry object. + */ void save(String key, Object value) - void list(String key, Consumer consumer) - + /** + * Load an entry for a given CID key. + * @param key CID key. + * @return entry value, or null if key does not exists + */ Object load(String key) + /** + * Get the CID store location path. + * @return CID store location path. + */ Path getPath() - CidHistoryFile getHistoryFile() + /** + * Get the {@link CidHistoryLog} object associated to the CidStore. + * @return {@link CidHistoryLog} object + */ + CidHistoryLog getHistoryLog() } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy index 1592f36928..097aecc4e4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy @@ -40,7 +40,7 @@ abstract class CidStoreFactory implements ExtensionPoint { protected abstract CidStore newInstance(DataConfig config) - private static CidStore create(DataConfig config){ + static CidStore create(DataConfig config){ final all = Plugins.getPriorityExtensions(CidStoreFactory) if( !all ) throw new IllegalStateException("Unable to find Nextflow CID store factory") diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy index 65096cb5de..b0d86cd394 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy @@ -17,9 +17,10 @@ package nextflow.data.cid +import nextflow.util.TestOnly + import java.nio.file.Files import java.nio.file.Path -import java.util.function.Consumer import groovy.transform.CompileStatic import groovy.util.logging.Slf4j @@ -36,46 +37,46 @@ import nextflow.exception.AbortOperationException class DefaultCidStore implements CidStore { private static String HISTORY_FILE_NAME =".history" + private static final String METADATA_FILE = '.data.json' + private static final String METADATA_PATH = '.meta' private Path metaLocation private Path location + private CidHistoryLog historyLog void open(DataConfig config) { location = config.store.location - metaLocation = getMetadataPath(config) + metaLocation = config.store.location.resolve(METADATA_PATH) if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } + historyLog = new CidHistoryFile(config.store.logLocation ?: metaLocation.resolve(HISTORY_FILE_NAME)) } @Override void save(String key, Object value) { - final path = metaLocation.resolve(key) + final path = metaLocation.resolve("$key/$METADATA_FILE") Files.createDirectories(path.parent) log.debug "Save CID file path: $path" path.text = value } - @Override - void list(String key, Consumer consumer) { - for( Path it : Files.walk(metaLocation.resolve(key)) ) { - final fileKey = metaLocation.relativize(it).toString() - consumer.accept(fileKey) - } - } - @Override Object load(String key) { - metaLocation.resolve(key).text + final path = metaLocation.resolve("$key/$METADATA_FILE") + log.debug("Loading from path $path") + if (path.exists()) + return path.text + log.debug("File for key $key not found") + return null } @Override Path getPath(){ location } - @Override - CidHistoryFile getHistoryFile(){ - return new CidHistoryFile(metaLocation.resolve(HISTORY_FILE_NAME)) - } + @TestOnly + Path getMetadataPath() {metaLocation} - static Path getMetadataPath(DataConfig config){ config.store.location.resolve('.meta') } + @Override + CidHistoryLog getHistoryLog(){ historyLog } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy index 683c912b25..0f3f39aba4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy @@ -17,7 +17,8 @@ package nextflow.data.cid.fs -import nextflow.data.cid.DefaultCidStore +import nextflow.data.cid.CidStore +import nextflow.data.cid.CidStoreFactory import java.nio.file.FileStore import java.nio.file.FileSystem @@ -40,7 +41,7 @@ class CidFileSystem extends FileSystem { private CidFileSystemProvider provider - private Path basePath + private CidStore cidStore /* * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 @@ -49,23 +50,23 @@ class CidFileSystem extends FileSystem { CidFileSystem(CidFileSystemProvider provider, DataConfig config) { this.provider = provider - this.basePath = DefaultCidStore.getMetadataPath(config) + this.cidStore = CidStoreFactory.create(config) } - Path getBasePath() { - return basePath + CidStore getCidStore() { + return cidStore } @Override boolean equals( Object other ) { if( this.class != other.class ) return false final that = (CidFileSystem)other - this.provider == that.provider && this.basePath == that.basePath + this.provider == that.provider && this.cidStore == that.cidStore } @Override int hashCode() { - Objects.hash(provider,basePath) + Objects.hash(provider,cidStore) } @Override diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy index 2f27d2e20b..4a255bc6f8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -208,20 +208,19 @@ class CidFileSystemProvider extends FileSystemProvider { @Override boolean accept(Path entry) throws IOException { - if( entry.startsWith(fs.getBasePath()) && entry.getFileName().toString() == CidPath.METADATA_FILE ) { - return false - } return true } } private static CidPath fromRealToCidPath(Path toConvert, Path realBase, CidPath cidBase){ - final fs = cidBase.fileSystem as CidFileSystem - if (toConvert.startsWith(fs.basePath)) { - return new CidPath(fs, toConvert) - } else { + if (toConvert.isAbsolute()) { + if (toConvert.class != realBase.class){ + throw new ProviderMismatchException() + } final relative = realBase.relativize(toConvert) return (CidPath) cidBase.resolve(relative.toString()) + } else { + return (CidPath) cidBase.resolve(toConvert.toString()) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy index 2bb6e1c086..279abfaf12 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -46,16 +46,12 @@ import nextflow.file.FileHelper class CidPath implements Path { static public final String SEPARATOR = '/' - public static final String METADATA_FILE = '.data.json' public static final String CID_PROT = "${SCHEME}://" static private final String[] EMPTY = new String[] {} private CidFileSystem fileSystem - // Path of the file in the metadata cid store - private Path storePath - // String with the cid file path private String filePath @@ -64,20 +60,13 @@ class CidPath implements Path { */ protected CidPath(){} - protected CidPath(CidFileSystem fs, Path target) { - this.fileSystem = fs - this.storePath = target - this.filePath = filePath0(fs, target) - } - CidPath(CidFileSystem fs, String path) { this(fs, path, EMPTY) } CidPath(CidFileSystem fs, String path, String[] more) { this.fileSystem = fs - this.storePath = resolve0(fs, norm0(path), norm0(more)) - this.filePath = filePath0(fs, storePath) + this.filePath = resolve0(fs, norm0(path), norm0(more)) } private static void validateHash(Map cidObject) { @@ -92,21 +81,22 @@ class CidPath implements Path { @TestOnly protected String getFilePath(){ this.filePath } - @TestOnly - protected Path getStorePath(){ this.storePath } - /** * Finds the target path of a CID path **/ - protected static Path findTarget(Path cidStorePath, CidFileSystem fs, String[] childs=[]){ - assert fs - if( fs.basePath == cidStorePath ) - return null - final metadata = cidStorePath.resolve(METADATA_FILE).toFile() - if ( metadata.exists() ){ - final slurper = new JsonSlurper() - final cidObject = slurper.parse(metadata.text.toCharArray()) as Map + protected static Path findTarget(CidFileSystem fs, String filePath, String[] childs=[]) throws Exception{ + if( !fs ) + throw new IllegalArgumentException("Cannot get target path for a relative CidPath") + if( filePath.isEmpty() || filePath == SEPARATOR ) + throw new IllegalArgumentException("Cannot get target path for an empty CidPath") + final store = fs.getCidStore() + if( !store ) + throw new Exception("CID store not found. Check Nextflow configuration.") + final slurper = new JsonSlurper() + final object = store.load(filePath) + if ( object ){ + final cidObject = slurper.parse(object.toString().toCharArray()) as Map final type = DataType.valueOf(cidObject.type as String) if( type == DataType.TaskOutput || type == DataType.WorkflowOutput ) { // return the real path stored in the metadata @@ -115,32 +105,29 @@ class CidPath implements Path { if (childs && childs.size() > 0) realPath = realPath.resolve(childs.join(SEPARATOR)) if( !realPath.exists() ) - throw new FileNotFoundException("Target path $realPath for $cidStorePath does not exists.") + throw new FileNotFoundException("Target path $realPath for $filePath does not exists.") return realPath } } else { // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output - final parent = cidStorePath.getParent() + final currentPath = Path.of(filePath) + final parent = Path.of(filePath).getParent() if( parent) { ArrayList newChilds = new ArrayList() - newChilds.add(cidStorePath.getFileName().toString()) + newChilds.add(currentPath.getFileName().toString()) newChilds.addAll(childs) - return findTarget(parent, fs, newChilds as String[]) + return findTarget(fs, parent.toString(), newChilds as String[]) } } - return null + throw new FileNotFoundException("Target path $filePath does not exists.") } - private static String filePath0(CidFileSystem fs, Path target) { - if( !fs ) - return target.toString() - return fs.basePath != target - ? fs.basePath.relativize(target).toString() - : SEPARATOR + private static boolean isEmptyBase(CidFileSystem fs, String base){ + return !base || base == SEPARATOR || (fs && base == "..") } - private static Path resolve0(CidFileSystem fs, String base, String[] more) { - if( !base || base == SEPARATOR ) { + private static String resolve0(CidFileSystem fs, String base, String[] more) { + if( isEmptyBase(fs,base) ) { return resolveEmptyPathCase(fs, more as List) } if( base.contains(SEPARATOR) ) { @@ -148,16 +135,14 @@ class CidPath implements Path { final remain = parts[1..-1] + more.toList() return resolve0(fs, parts[0], remain as String[]) } - final result = fs ? fs.basePath.resolve(base) : Path.of(base) - return more - ? result.resolve(more.join(SEPARATOR)) - : result + def result = Path.of(base) + return more ? result.resolve(more.join(SEPARATOR)).toString() : result.toString() } - private static Path resolveEmptyPathCase(CidFileSystem fs, List more ){ + private static String resolveEmptyPathCase(CidFileSystem fs, List more ){ switch(more.size()) { case 0: - return fs ? fs.basePath : Path.of("/") + return "/" case 1: return resolve0(fs, more[0], EMPTY) default: @@ -166,10 +151,8 @@ class CidPath implements Path { } static private String norm0(String path) { - if( !path ) + if( !path || path==SEPARATOR) return "" - if( path==SEPARATOR ) - return path //Remove repeated elements path = Path.of(path).normalize().toString() //Remove initial and final separators @@ -204,7 +187,7 @@ class CidPath implements Path { @Override Path getFileName() { - final result = storePath?.getFileName()?.toString() + final result = Path.of(filePath).getFileName()?.toString() return result ? new CidPath(null, result) : null } @@ -220,28 +203,28 @@ class CidPath implements Path { @Override int getNameCount() { - return fileSystem ? storePath.nameCount-fileSystem.basePath.nameCount : storePath.nameCount + return Path.of(filePath).nameCount } @Override Path getName(int index) { if( index<0 ) throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") - final c= fileSystem.basePath.nameCount - return new CidPath(index==0 ? fileSystem : null, storePath.getName(c + index).toString()) + final path = Path.of(filePath) + return new CidPath(index==0 ? fileSystem : null, path.getName(index).toString()) } @Override Path subpath(int beginIndex, int endIndex) { if( beginIndex<0 ) throw new IllegalArgumentException("subpath begin index cannot be less than zero - offending value: $beginIndex") - final c= fileSystem.basePath.nameCount - return new CidPath(beginIndex==0 ? fileSystem : null, storePath.subpath(c+beginIndex, c+endIndex).toString()) + final path = Path.of(filePath) + return new CidPath(beginIndex==0 ? fileSystem : null, path.subpath(beginIndex, endIndex).toString()) } @Override Path normalize() { - return new CidPath(fileSystem, storePath.normalize()) + return new CidPath(fileSystem, Path.of(filePath).normalize().toString()) } @Override @@ -251,7 +234,7 @@ class CidPath implements Path { @Override boolean startsWith(String other) { - return storePath.startsWith(fileSystem.basePath.resolve(other)) + return filePath.startsWith(other) } @Override @@ -261,7 +244,7 @@ class CidPath implements Path { @Override boolean endsWith(String other) { - return storePath.endsWith(other) + return filePath.endsWith(other) } @Override @@ -275,12 +258,10 @@ class CidPath implements Path { return other if( that.isAbsolute() ) { return that + } else { + final newPath = Path.of(filePath).resolve(that.toString()) + return new CidPath(fileSystem, newPath.toString()) } - if( that.storePath ) { - final newPath = this.storePath.resolve(that.storePath) - return new CidPath(fileSystem, newPath) - } - return this } @Override @@ -304,7 +285,17 @@ class CidPath implements Path { if( CidPath.class != other.class ) { throw new ProviderMismatchException() } - final path = storePath.relativize(((CidPath) other).storePath) + CidPath cidOther = other as CidPath + if( this.isAbsolute() != cidOther.isAbsolute() ) + throw new IllegalArgumentException("Cannot compare absolute with relative paths"); + def path + if( this.isAbsolute() ) { + // Compare 'filePath' as absolute paths adding the root separator + path = Path.of(SEPARATOR + filePath).relativize(Path.of(SEPARATOR + cidOther.filePath)) + } else { + // Compare 'filePath' as relative paths + path = Path.of(filePath).relativize(Path.of(cidOther.filePath)) + } return new CidPath(null , path.getNameCount()>0 ? path.toString(): SEPARATOR) } @@ -324,12 +315,11 @@ class CidPath implements Path { @Override Path toRealPath(LinkOption... options) throws IOException { - return getTargetPath() + return this.getTargetPath() } protected Path getTargetPath(){ - final target = findTarget(storePath, fileSystem) - return target ? target : storePath + return findTarget(fileSystem, filePath) } @Override @@ -347,7 +337,7 @@ class CidPath implements Path { if( CidPath.class != other.class ) throw new ProviderMismatchException() final that = other as CidPath - return this.storePath.compareTo(that.storePath) + return Path.of(this.filePath).compareTo(Path.of(that.filePath)) } @Override @@ -356,7 +346,7 @@ class CidPath implements Path { return false } final that = (CidPath)other - return this.fileSystem == that.fileSystem && this.storePath.equals(that.storePath) + return this.fileSystem == that.fileSystem && this.filePath.equals(that.filePath) } /** @@ -364,7 +354,7 @@ class CidPath implements Path { */ @Override int hashCode() { - return Objects.hash(fileSystem,storePath) + return Objects.hash(fileSystem,filePath) } static URI asUri(String path) { diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Checksum.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Checksum.groovy new file mode 100644 index 0000000000..08a91de950 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Checksum.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Models a checksum including the value as well as the algortihm and mode used to compute it. + * + * @author Jorge Ejarque inputs String container String conda diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy b/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy index f5873251ef..be6072a75c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy @@ -17,6 +17,8 @@ package nextflow.data.config +import nextflow.file.FileHelper + import java.nio.file.Path import groovy.transform.CompileStatic @@ -29,11 +31,13 @@ import groovy.transform.CompileStatic class DataStoreOpts { final Path location + final Path logLocation DataStoreOpts(Map opts) { this.location = opts.location - ? Path.of(opts.location as String) + ? FileHelper.toCanonicalPath(opts.location as String) : Path.of('.').toAbsolutePath().normalize().resolve('data') + this.logLocation = opts.logLocation ? FileHelper.toCanonicalPath(opts.logLocation as String) : null } } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index b0bf67aaca..38f0ca9e3b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -15,6 +15,7 @@ */ package nextflow.processor +import nextflow.data.cid.fs.CidPath import nextflow.trace.TraceRecord import static nextflow.processor.ErrorStrategy.* @@ -1939,6 +1940,9 @@ class TaskProcessor { if( item instanceof Path || coerceToPath ) { def path = normalizeToPath(item) + if (path instanceof CidPath){ + path = path.toRealPath() + } def target = executor.isForeignFile(path) ? batch.addToForeign(path) : path def holder = new FileHolder(target) files << holder diff --git a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy index 8e3b8cb73c..15d5cb83ca 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/HistoryFile.groovy @@ -33,7 +33,7 @@ import nextflow.exception.AbortOperationException * @author Paolo Di Tommaso */ @Slf4j -class HistoryFile extends WithLockFile { +class HistoryFile extends File { static String defaultFileName() { Const.appCacheDir.resolve('history').toString() } @@ -410,7 +410,52 @@ class HistoryFile extends WithLockFile { } } + /** + * Apply the given action by using a file lock + * + * @param action The closure implementing the action to be executed with a file lock + * @return The value returned by the action closure + */ + private withFileLock(Closure action) { + + def rnd = new Random() + long ts = System.currentTimeMillis() + String parent = this.parent ?: new File('.').absolutePath + def file = new File(parent, "${this.name}.lock".toString()) + def fos = new FileOutputStream(file) + try { + Throwable error + FileLock lock = null + try { + while( true ) { + lock = fos.getChannel().tryLock() + if( lock ) break + if( System.currentTimeMillis() - ts < 1_000 ) + sleep rnd.nextInt(75) + else { + error = new IllegalStateException("Can't lock file: ${this.absolutePath} -- Nextflow needs to run in a file system that supports file locks") + break + } + } + if( lock ) { + return action.call() + } + } + catch( Exception e ) { + return action.call() + } + finally { + if( lock?.isValid() ) lock.release() + } + + if( error ) throw error + } + finally { + fos.closeQuietly() + file.delete() + } + } Set findAllRunNames() { findAll().findResults{ it.runName } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy b/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy deleted file mode 100644 index 20f6553bb6..0000000000 --- a/modules/nextflow/src/main/groovy/nextflow/util/WithLockFile.groovy +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.util - -import java.nio.channels.FileLock - -/** - * File with a file lock. - * - * @author Jorge Ejarque - */ -class WithLockFile extends File { - - WithLockFile(String filepath){ - super(filepath) - } - - /** - * Apply the given action by using a file lock - * - * @param action The closure implementing the action to be executed with a file lock - * @return The value returned by the action closure - */ - protected withFileLock(Closure action) { - - def rnd = new Random() - long ts = System.currentTimeMillis() - String parent = this.parent ?: new File('.').absolutePath - def file = new File(parent, "${this.name}.lock".toString()) - def fos = new FileOutputStream(file) - try { - Throwable error - FileLock lock = null - - try { - while( true ) { - lock = fos.getChannel().tryLock() - if( lock ) break - if( System.currentTimeMillis() - ts < 1_000 ) - sleep rnd.nextInt(75) - else { - error = new IllegalStateException("Can't lock file: ${this.absolutePath} -- Nextflow needs to run in a file system that supports file locks") - break - } - } - if( lock ) { - return action.call() - } - } - catch( Exception e ) { - return action.call() - } - finally { - if( lock?.isValid() ) lock.release() - } - - if( error ) throw error - } - finally { - fos.closeQuietly() - file.delete() - } - } -} diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index d833a34e9c..b3f9f76250 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -17,11 +17,11 @@ package nextflow.cli import groovy.json.JsonOutput +import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStoreFactory import java.nio.file.Files -import nextflow.data.cid.CidHistoryFile import nextflow.plugin.Plugins import org.junit.Rule @@ -63,7 +63,7 @@ class CmdCidTest extends Specification { def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - def recordEntry = "${CidHistoryFile.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://1234".toString() + def recordEntry = "${CidHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://123456\tcid://456789".toString() historyFile.text = recordEntry when: def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) @@ -79,8 +79,8 @@ class CmdCidTest extends Specification { stdout.size() == 2 stdout[1] == recordEntry - //cleanup: - //folder?.deleteDir() + cleanup: + folder?.deleteDir() } def 'should print no history' (){ @@ -101,6 +101,7 @@ class CmdCidTest extends Specification { .readLines()// remove the log part .findResults { line -> !line.contains('DEBUG') ? line : null } .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('WARN') ? line : null } .findResults { line -> !line.contains('plugin') ? line : null } then: @@ -170,7 +171,7 @@ class CmdCidTest extends Specification { then: stdout.size() == 1 - stdout[0] == "Error loading cid://12345." + stdout[0] == "No entry found for cid://12345." cleanup: folder?.deleteDir() diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy index 2b3412466d..ac9d3a627a 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy @@ -17,7 +17,6 @@ package nextflow.data.cid import spock.lang.Specification -import spock.lang.TempDir import java.nio.file.Files import java.nio.file.Path @@ -29,72 +28,85 @@ import java.nio.file.Path */ class CidHistoryFileTest extends Specification { - @TempDir Path tempDir - Path historyFile CidHistoryFile cidHistoryFile def setup() { + tempDir = Files.createTempDirectory("wdir") historyFile = tempDir.resolve("cid-history.txt") Files.createFile(historyFile) cidHistoryFile = new CidHistoryFile(historyFile) } + def cleanup(){ + tempDir?.deleteDir() + } + def "write should append a new record to the file"() { given: UUID sessionId = UUID.randomUUID() String runName = "TestRun" String runCid = "cid://123" + String resultsCid = "cid://456" when: - cidHistoryFile.write(runName, sessionId, runCid) + cidHistoryFile.write(runName, sessionId, runCid, resultsCid) then: def lines = Files.readAllLines(historyFile) lines.size() == 1 - def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + def parsedRecord = CidHistoryRecord.parse(lines[0]) parsedRecord.sessionId == sessionId parsedRecord.runName == runName parsedRecord.runCid == runCid + parsedRecord.resultsCid == resultsCid } - def "getRunCid should return correct runCid for existing session"() { + def "should return correct record for existing session"() { given: UUID sessionId = UUID.randomUUID() String runName = "Run1" String runCid = "cid://123" + String resultsCid = "cid://456" and: - cidHistoryFile.write(runName, sessionId, runCid) + cidHistoryFile.write(runName, sessionId, runCid, resultsCid) - expect: - cidHistoryFile.getRunCid(sessionId) == runCid + when: + def record = cidHistoryFile.getRecord(sessionId) + then: + record.sessionId == sessionId + record.runName == runName + record.runCid == runCid + record.resultsCid == resultsCid } - def "getRunCid should return null if session does not exist"() { + def "should return null if session does not exist"() { expect: - cidHistoryFile.getRunCid(UUID.randomUUID()) == null + cidHistoryFile.getRecord(UUID.randomUUID()) == null } - def "update should modify existing runCid for given session"() { + def "update should modify existing Cids for given session"() { given: UUID sessionId = UUID.randomUUID() String runName = "Run1" - String initialCid = "cid-abc" - String updatedCid = "cid-updated" + String runCidUpdated = "run-cid-updated" + String resultsCidUpdated = "results-cid-updated" and: - cidHistoryFile.write(runName, sessionId, initialCid) + cidHistoryFile.write(runName, sessionId, 'run-cid-initial', 'results-cid-inital') when: - cidHistoryFile.update(sessionId, updatedCid) + cidHistoryFile.updateRunCid(sessionId, runCidUpdated) + cidHistoryFile.updateResultsCid(sessionId, resultsCidUpdated) then: def lines = Files.readAllLines(historyFile) lines.size() == 1 - def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) - parsedRecord.runCid == updatedCid + def parsedRecord = CidHistoryRecord.parse(lines[0]) + parsedRecord.runCid == runCidUpdated + parsedRecord.resultsCid == resultsCidUpdated } def "update should do nothing if session does not exist"() { @@ -103,56 +115,38 @@ class CidHistoryFileTest extends Specification { UUID nonExistingSessionId = UUID.randomUUID() String runName = "Run1" String runCid = "cid://123" - + String resultsCid = "cid://456" and: - cidHistoryFile.write(runName, existingSessionId, runCid) + cidHistoryFile.write(runName, existingSessionId, runCid, resultsCid) when: - cidHistoryFile.update(nonExistingSessionId, "new-cid") - + cidHistoryFile.updateRunCid(nonExistingSessionId, "new-cid") + cidHistoryFile.updateRunCid(nonExistingSessionId, "new-res-cid") then: def lines = Files.readAllLines(historyFile) lines.size() == 1 - def parsedRecord = CidHistoryFile.CidRecord.parse(lines[0]) + def parsedRecord = CidHistoryRecord.parse(lines[0]) parsedRecord.runCid == runCid + parsedRecord.resultsCid == resultsCid } - def "CidRecord parse should throw for invalid record"() { - when: - CidHistoryFile.CidRecord.parse("invalid-record") - - then: - thrown(IllegalArgumentException) - } - - def "CidRecord parse should handle 4-column record"() { - given: - def timestamp = new Date() - def formattedTimestamp = CidHistoryFile.TIMESTAMP_FMT.format(timestamp) - def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tcid://123" - - when: - def record = CidHistoryFile.CidRecord.parse(line) - - then: - record.timestamp != null - record.runName == "run-1" - record.runCid == "cid://123" - } - - def "CidRecord toString should produce tab-separated format"() { + def 'should get records' () { given: UUID sessionId = UUID.randomUUID() - def record = new CidHistoryFile.CidRecord(sessionId, "TestRun") - record.timestamp = new Date() - record.runCid = "cid://123" + String runName = "Run1" + String runCid = "cid://123" + String resultsCid = "cid://456" + and: + cidHistoryFile.write(runName, sessionId, runCid, resultsCid) when: - def line = record.toString() - + def records = cidHistoryFile.getRecords() then: - line.contains("\t") - line.split("\t").size() == 4 + records.size() == 1 + records[0].sessionId == sessionId + records[0].runName == runName + records[0].runCid == runCid + records[0].resultsCid == resultsCid } } diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryRecordTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryRecordTest.groovy new file mode 100644 index 0000000000..150e6c8bee --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryRecordTest.groovy @@ -0,0 +1,63 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import spock.lang.Specification + +/** + * CID History Record tests + * + * @author Jorge Ejarque + */ +class CidHistoryRecordTest extends Specification { + def "CidRecord parse should throw for invalid record"() { + when: + CidHistoryRecord.parse("invalid-record") + + then: + thrown(IllegalArgumentException) + } + + def "CidRecord parse should handle 4-column record"() { + given: + def timestamp = new Date() + def formattedTimestamp = CidHistoryRecord.TIMESTAMP_FMT.format(timestamp) + def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tcid://123\tcid://456" + + when: + def record = CidHistoryRecord.parse(line) + + then: + record.timestamp != null + record.runName == "run-1" + record.runCid == "cid://123" + record.resultsCid == "cid://456" + } + + def "CidRecord toString should produce tab-separated format"() { + given: + UUID sessionId = UUID.randomUUID() + def record = new CidHistoryRecord(new Date(), "TestRun", sessionId, "cid://123", "cid://456") + + when: + def line = record.toString() + + then: + line.contains("\t") + line.split("\t").size() == 5 + } +} diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index 3d678638a7..3d927ec54a 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -69,7 +69,8 @@ class CidObserverTest extends Specification { store.open(DataConfig.create(session)) def observer = new CidObserver(session, store) def expectedString = '{"type":"WorkflowRun","workflow":{"type": "Workflow",' + - '"mainScriptFile":{"path":"file://' + scriptFile.toString() + '", "checksum": "78910"},' + + '"mainScriptFile":{"path":"file://' + scriptFile.toString() + + '", "checksum": {"value": "78910", "algorithm": "nextflow", "mode": "STANDARD"}},' + '"otherScriptFiles": [], "repository": "https://nextflow.io/nf-test/",' + '"commitId": "123456" },' + '"sessionId": "' + uniqueId + '",' + @@ -118,7 +119,8 @@ class CidObserverTest extends Specification { } def expectedString = '{"type":"TaskRun",' + '"sessionId":"'+uniqueId.toString() + '",' + - '"name":"foo","code":"' + sourceHash + '",' + + '"name":"foo", "codeChecksum": {' + + '"value": "' + sourceHash + '", "algorithm": "nextflow", "mode": "STANDARD"},' + '"inputs": null,"container": null,"conda": null,' + '"spack": null,"architecture": null,' + '"globalVars": {},"binEntries": [],"annotations":null}' @@ -162,7 +164,8 @@ class CidObserverTest extends Specification { def attrs = Files.readAttributes(outFile, BasicFileAttributes) def expectedString = '{"type":"TaskOutput",' + '"path":"' + outFile.toString() + '",' + - '"checksum":"'+ fileHash + '",' + + '"checksum": { "value":"'+ fileHash + '",' + + '"algorithm": "nextflow", "mode": "STANDARD"},' + '"source":"cid://15cd5b07",' + '"size":'+attrs.size() + ',' + '"createdAt":' + attrs.creationTime().toMillis() + ',' + @@ -321,7 +324,7 @@ class CidObserverTest extends Specification { observer.onFlowCreate(session) observer.onFlowBegin() then: 'History file should contain execution hash' - def cid = store.getHistoryFile().getRunCid(uniqueId).substring(CID_PROT.size()) + def cid = store.getHistoryLog().getRecord(uniqueId).runCid.substring(CID_PROT.size()) cid == observer.executionHash when: ' publish output with source file' @@ -337,7 +340,8 @@ class CidObserverTest extends Specification { def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() def expectedString1 = '{"type":"WorkflowOutput",' + '"path":"' + outFile1.toString() + '",' + - '"checksum":"'+ fileHash1 + '",' + + '"checksum": {"value": "'+ fileHash1 + '",' + + '"algorithm": "nextflow", "mode": "STANDARD"},' + '"source":"cid://123987/file.bam",' + '"size":'+attrs1.size() + ',' + '"createdAt":' + attrs1.creationTime().toMillis() + ',' + @@ -355,7 +359,8 @@ class CidObserverTest extends Specification { then: 'Check outFile2 metadata in cid store' def expectedString2 = '{"type":"WorkflowOutput",' + '"path":"' + outFile2.toString() + '",' + - '"checksum":"'+ fileHash2 + '",' + + '"checksum": { "value": "'+ fileHash2 + '",' + + '"algorithm": "nextflow", "mode": "STANDARD"},' + '"source":"cid://' + observer.executionHash +'",' + '"size":'+attrs2.size() + ',' + '"createdAt":' + attrs2.creationTime().toMillis() + ',' + @@ -370,7 +375,7 @@ class CidObserverTest extends Specification { '"run":"cid://' + observer.executionHash +'",' + '"outputs": [ "cid://'+ observer.executionHash + '/foo/file.bam",' + '"cid://'+ observer.executionHash + '/foo/file2.bam" ]}' - def finalCid = store.getHistoryFile().getRunCid(uniqueId).substring(CID_PROT.size()) + def finalCid = store.getHistoryLog().getRecord(uniqueId).resultsCid.substring(CID_PROT.size()) finalCid != observer.executionHash folder.resolve(".meta/${finalCid}/.data.json").text == JsonOutput.prettyPrint(expectedString3) diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/DefaultCidStoreTest.groovy new file mode 100644 index 0000000000..aabd794096 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -0,0 +1,97 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import nextflow.data.config.DataConfig +import spock.lang.Specification +import spock.lang.TempDir + +import java.nio.file.Files +import java.nio.file.Path +import java.util.function.Consumer + +/** + * + * @author Jorge Ejarque + */ +class DefaultCidStoreTest extends Specification { + + @TempDir + Path tempDir + + Path storeLocation + Path metaLocation + DataConfig config + + def setup() { + storeLocation = tempDir.resolve("store") + metaLocation = storeLocation.resolve(".meta") + def configMap = [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]] + config = new DataConfig(configMap) + } + + def 'should open store'() { + given: + def cidStore = new DefaultCidStore() + when: + cidStore.open(config) + def historyLog = cidStore.getHistoryLog() + then: + cidStore.getPath() == storeLocation + cidStore.getMetadataPath() == metaLocation + historyLog != null + historyLog instanceof CidHistoryFile + } + + def "save should store value in the correct file location"() { + given: + def key = "testKey" + def value = "testValue" + def cidStore = new DefaultCidStore() + cidStore.open(config) + + when: + cidStore.save(key, value) + + then: + def filePath = metaLocation.resolve("$key/.data.json") + Files.exists(filePath) + filePath.text == value + } + + def "load should retrieve stored value correctly"() { + given: + def key = "testKey" + def value = "testValue" + def cidStore = new DefaultCidStore() + cidStore.open(config) + cidStore.save(key, value) + + expect: + cidStore.load(key) == value + } + + def "load should return null if key does not exist"() { + given: + def cidStore = new DefaultCidStore() + cidStore.open(config) + + expect: + cidStore.load("nonexistentKey") == null + } +} \ No newline at end of file diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy index 72979f580b..e5170b6005 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy @@ -74,18 +74,18 @@ class CidFileSystemProviderTest extends Specification { def 'should create new file system' () { given: def provider = new CidFileSystemProvider() - def config = [store:[location:'/data']] + def config = [store:[location:data.toString()]] def cid = CidPath.asUri('cid://12345') when: def fs = provider.newFileSystem(cid, config) as CidFileSystem then: - fs.basePath == Path.of('/data/.meta') + fs.cidStore.path == data } def 'should get a file system' () { given: def provider = new CidFileSystemProvider() - def config = [store:[location:'/data']] + def config = [store:[location: data.toString()]] def uri = CidPath.asUri('cid://12345') when: provider.getFileSystem(uri) @@ -97,12 +97,12 @@ class CidFileSystemProviderTest extends Specification { and: def result = provider.getFileSystem(uri) as CidFileSystem then: - result.basePath == Path.of('/data/.meta') + result.cidStore.path == data } def 'should get or create a file system' () { given: - def config = [workflow:[data:[store:[location:'/this/that']]]] + def config = [workflow:[data:[store:[location: data.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: def uri = CidPath.asUri('cid://12345') @@ -111,7 +111,7 @@ class CidFileSystemProviderTest extends Specification { when: def fs = provider.getFileSystemOrCreate(uri) as CidFileSystem then: - fs.basePath == Path.of('/this/that/.meta') + fs.cidStore.path == data when: def fs2 = provider.getFileSystemOrCreate(uri) as CidFileSystem @@ -119,26 +119,6 @@ class CidFileSystemProviderTest extends Specification { fs2.is(fs) } - def 'should get a path' () { - given: - def config = [workflow:[data:[store:[location:'/data']]]] - Global.session = Mock(Session) { getConfig()>>config } - and: - def provider = new CidFileSystemProvider() - def uri1 = CidPath.asUri('cid://12345') - def uri2 = CidPath.asUri('cid://12345/foo/bar') - - when: - def cid1 = provider.getPath(uri1) - then: - cid1.getTargetPath() == Path.of('/data/.meta/12345') - - when: - def cid2 = provider.getPath(uri2) - then: - cid2.getTargetPath() == Path.of('/data/.meta/12345/foo/bar') - } - def 'should create new byte channel' () { given: def config = [workflow:[data:[store:[location:wdir.toString()]]]] @@ -234,26 +214,25 @@ class CidFileSystemProviderTest extends Specification { Files.exists(cid.resolve('file3.txt')) when: - def stream = provider.newDirectoryStream(cid2, (p) -> true) - and: - def result = stream.toList() + provider.newDirectoryStream(cid2, (p) -> true) then: - result.toSet() == [ - cid2.resolve('output1'), - cid2.resolve('output2'), - ] as Set + thrown(FileNotFoundException) when: - def stream2 = provider.newDirectoryStream(cid, (p) -> true) + def stream = provider.newDirectoryStream(cid, (p) -> true) and: - def result2 = stream2.toList() + def result = stream.toList() then: - result2.toSet() == [ + result.toSet() == [ cid.resolve('file1.txt'), cid.resolve('file2.txt'), cid.resolve('file3.txt') ] as Set + cleanup: + meta.resolve('12345').deleteDir() + output1.deleteDir() + } def 'should not delete a file' () { @@ -325,12 +304,19 @@ class CidFileSystemProviderTest extends Specification { def 'should check is hidden file' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [workflow:[data:[store:[location:wdir.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: + def output = folder.resolve('path') + output.mkdir() + output.resolve('abc').text = 'file1' + output.resolve('.foo').text = 'file2' + meta.resolve('12345/output').mkdirs() + meta.resolve('12345/output/.data.json').text = '{"type":"TaskOutput", "path": "' + output.toString() + '"}' + and: def provider = new CidFileSystemProvider() - def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) - def cid2 = provider.getPath(CidPath.asUri('cid://54321/.foo')) + def cid1 = provider.getPath(CidPath.asUri('cid://12345/output/abc')) + def cid2 = provider.getPath(CidPath.asUri('cid://12345/output/.foo')) expect: !provider.isHidden(cid1) diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy index fc2592d170..69663dd995 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy @@ -17,6 +17,9 @@ package nextflow.data.cid.fs +import nextflow.data.cid.DefaultCidStore +import nextflow.data.config.DataConfig + import java.nio.file.Files import java.nio.file.Path @@ -30,11 +33,10 @@ import spock.lang.Unroll */ class CidPathTest extends Specification { - @Shared def BASE = Path.of('/some/base/data') - @Shared def fs = Mock(CidFileSystem){ getBasePath() >> BASE } @Shared def wdir = Files.createTempDirectory('wdir') @Shared def cid = wdir.resolve('.meta') @Shared def data = wdir.resolve('work') + @Shared def fs = Mock(CidFileSystem) def cleanupSpec(){ wdir.deleteDir() @@ -44,56 +46,97 @@ class CidPathTest extends Specification { when: def cid = new CidPath(FS, PATH, MORE) then: - cid.storePath == EXPECTED_STORE cid.filePath == EXPECTED_FILE where: - FS | PATH | MORE | EXPECTED_STORE | EXPECTED_FILE - fs | '/' | [] as String[] | BASE | '/' - null | '/' | [] as String[] | Path.of('/') | '/' - fs | '/' | ['a','b'] as String[] | BASE.resolve('a/b') | 'a/b' - null | '/' | ['a','b'] as String[] | Path.of('a/b') | 'a/b' - fs | '' | [] as String[] | BASE | '/' - null | '' | [] as String[] | Path.of('/') | '/' - fs | '' | ['a','b'] as String[] | BASE.resolve('a/b') | 'a/b' - null | '' | ['a','b'] as String[] | Path.of('a/b') | 'a/b' - fs | '1234' | [] as String[] | BASE.resolve('1234') | '1234' - null | '1234' | [] as String[] | Path.of('1234') | '1234' - fs | '1234' | ['a','b'] as String[] | BASE.resolve('1234/a/b') | '1234/a/b' - null | '1234' | ['a','b'] as String[] | Path.of('1234/a/b') | '1234/a/b' - fs | '1234/c' | [] as String[] | BASE.resolve('1234/c') | '1234/c' - null | '1234/c' | [] as String[] | Path.of('1234/c') | '1234/c' - fs | '1234/c' | ['a','b'] as String[] | BASE.resolve('1234/c/a/b') | '1234/c/a/b' - null | '1234/c' | ['a','b'] as String[] | Path.of('1234/c/a/b') | '1234/c/a/b' - fs | '/1234/c' | [] as String[] | BASE.resolve('1234/c') | '1234/c' - null | '/1234/c' | [] as String[] | Path.of('1234/c') | '1234/c' - fs | '/1234/c' | ['a','b'] as String[] | BASE.resolve('1234/c/a/b') | '1234/c/a/b' - null | '/1234/c' | ['a','b'] as String[] | Path.of('1234/c/a/b') | '1234/c/a/b' + FS | PATH | MORE | EXPECTED_FILE + fs | '/' | [] as String[] | '/' + null | '/' | [] as String[] | '/' + fs | '/' | ['a','b'] as String[] | 'a/b' + null | '/' | ['a','b'] as String[] | 'a/b' + fs | '' | [] as String[] | '/' + null | '' | [] as String[] | '/' + fs | '' | ['a','b'] as String[] | 'a/b' + null | '' | ['a','b'] as String[] | 'a/b' + fs | '1234' | [] as String[] | '1234' + null | '1234' | [] as String[] | '1234' + fs | '1234' | ['a','b'] as String[] | '1234/a/b' + null | '1234' | ['a','b'] as String[] | '1234/a/b' + fs | '1234/c' | [] as String[] | '1234/c' + null | '1234/c' | [] as String[] | '1234/c' + fs | '1234/c' | ['a','b'] as String[] | '1234/c/a/b' + null | '1234/c' | ['a','b'] as String[] | '1234/c/a/b' + fs | '/1234/c' | [] as String[] | '1234/c' + null | '/1234/c' | [] as String[] | '1234/c' + fs | '/1234/c' | ['a','b'] as String[] | '1234/c/a/b' + null | '/1234/c' | ['a','b'] as String[] | '1234/c/a/b' + fs | '../c' | ['a','b'] as String[] | 'c/a/b' + null | '../c' | ['a','b'] as String[] | '../c/a/b' + fs | '../c' | [] as String[] | 'c' + null | '../c' | [] as String[] | '../c' + fs | '..' | [] as String[] | '/' + null | '..' | [] as String[] | '..' + fs | '/..' | [] as String[] | '/' + null | '/..' | [] as String[] | '/' + fs | './1234/c' | ['a','b'] as String[] | '1234/c/a/b' + null | './1234/c' | ['a','b'] as String[] | '1234/c/a/b' + fs | './1234/c' | [] as String[] | '1234/c' + null | './1234/c' | [] as String[] | '1234/c' + fs | '1234' | ['/'] as String[] | '1234' + null | '1234' | ['/'] as String[] | '1234' } def 'should get target path' () { given: - def output1 = data.resolve('output') - output1.resolve('some/path').mkdirs() - output1.resolve('some/path/file1.txt').text = "this is file1" - def output2 = data.resolve('file2.txt') - output2.text = "this is file2" - def cidFs = Mock(CidFileSystem){ getBasePath() >> cid } + def outputFolder = data.resolve('output') + def outputSubFolder = outputFolder.resolve('some/path') + outputSubFolder.mkdirs() + def outputSubFolderFile = outputSubFolder.resolve('file1.txt') + outputSubFolderFile.text = "this is file1" + def outputFile = data.resolve('file2.txt') + outputFile.text = "this is file2" + def store = new DefaultCidStore() + store.open(new DataConfig(enabled: true, store: [location: cid.parent.toString()])) + def cidFs = Mock(CidFileSystem){ getCidStore() >> store } cid.resolve('12345/output1').mkdirs() cid.resolve('12345/path/to/file2.txt').mkdirs() cid.resolve('12345/.data.json').text = '{"type":"TaskRun"}' - cid.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + output1.toString() + '"}' - cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"TaskOutput", "path": "' + output2.toString() + '"}' + cid.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + outputFolder.toString() + '"}' + cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"TaskOutput", "path": "' + outputFile.toString() + '"}' + + expect: 'Get real path when CidPath is the output data or a subfolder' + new CidPath(cidFs,'12345/output1' ).getTargetPath() == outputFolder + new CidPath(cidFs,'12345/output1/some/path' ).getTargetPath() == outputSubFolder + new CidPath(cidFs,'12345/output1/some/path/file1.txt').getTargetPath().text == outputSubFolderFile.text + new CidPath(cidFs, '12345/path/to/file2.txt').getTargetPath().text == outputFile.text + + when: 'CidPath fs is null' + new CidPath(null, '12345').getTargetPath() + then: + thrown(IllegalArgumentException) + + when: 'CidPath is empty' + new CidPath(cidFs, '/').getTargetPath() + then: + thrown(IllegalArgumentException) + + when: 'CidPath is not an output data description' + new CidPath(cidFs, '12345').getTargetPath() + then: + thrown(FileNotFoundException) + + when: 'CidPath is not subfolder of an output data description' + new CidPath(cidFs, '12345/path').getTargetPath() + then: + thrown(FileNotFoundException) + + when: 'Cid does not exist' + new CidPath(cidFs, '23456').getTargetPath() + then: + thrown(FileNotFoundException) + + cleanup: + cid.resolve('12345').deleteDir() - expect: - new CidPath(cidFs, PATH).getTargetPath() == EXPECTED - where: - PATH | EXPECTED - '/' | cid - '12345' | cid.resolve('12345') - '12345/output1' | data.resolve('output') - '12345/output1/some/path' | data.resolve('output/some/path') - '12345/path/to/' | cid.resolve('12345/path/to/') - '12345/path/to/file2.txt/' | data.resolve('file2.txt') } def 'should get file name' () { @@ -214,6 +257,37 @@ class CidPathTest extends Specification { '12345/a' | '/' } + def 'should relativize path' () { + expect: + BASE_PATH.relativize(PATH) == EXPECTED + where : + BASE_PATH | PATH | EXPECTED + new CidPath(fs, '/') | new CidPath(fs, '123/a/b/c') | new CidPath(null, '123/a/b/c') + new CidPath(fs,'123/a/') | new CidPath(fs, '123/a/b/c') | new CidPath(null, 'b/c') + new CidPath(fs,'123/a/') | new CidPath(fs, '321/a/') | new CidPath(null, '../../321/a') + new CidPath(null,'123/a') | new CidPath(null, '123/a/b/c') | new CidPath(null, 'b/c') + new CidPath(null,'123/a') | new CidPath(null, '321/a') | new CidPath(null, '../../321/a') + new CidPath(fs,'../a/') | new CidPath(fs, '321/a') | new CidPath(null, '../321/a') + new CidPath(fs,'321/a/') | new CidPath(fs, '../a') | new CidPath(null, '../../a') + new CidPath(null,'321/a/') | new CidPath(null, '../a') | new CidPath(null, '../../../a') + } + + def 'relativize should throw exception' () { + given: + def cid1 = new CidPath(fs,'123/a/') + def cid2 = new CidPath(null,'123/a/') + def cid3 = new CidPath(null, '../a/b') + when: 'comparing relative with absolute' + cid1.relativize(cid2) + then: + thrown(IllegalArgumentException) + + when: 'undefined base path' + cid3.relativize(cid2) + then: + thrown(IllegalArgumentException) + } + def 'should resolve path' () { when: def cid1 = new CidPath(fs, '123/a/b/c') @@ -236,7 +310,6 @@ class CidPathTest extends Specification { given: def pr = Mock(CidFileSystemProvider) def cidfs = Mock(CidFileSystem){ - getBasePath() >> BASE provider() >> pr} diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy index 800a60f637..24e5f44fee 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy @@ -17,6 +17,7 @@ package nextflow.data.cid.fs +import java.nio.file.Files import java.nio.file.Path import nextflow.Global @@ -31,12 +32,16 @@ import spock.lang.Unroll */ class CifPathFactoryTest extends Specification { + Path tmp + def setup() { - Global.session = Mock(Session) { getConfig()>> [workflow:[data:[store:[location: '/some/data']]]] } + tmp = Files.createTempDirectory("data") + Global.session = Mock(Session) { getConfig()>> [workflow:[data:[store:[location: tmp.toString()]]]] } } def cleanup() { Global.session = null + tmp.deleteDir() } def 'should create cid path' () { @@ -49,20 +54,17 @@ class CifPathFactoryTest extends Specification { when: def p1 = factory.parseUri('cid://12345') then: - p1.getTargetPath() == Path.of('/some/data/.meta/12345') p1.toUriString() == 'cid://12345' when: def p2 = factory.parseUri('cid://12345/x/y/z') then: - p2.getTargetPath() == Path.of('/some/data/.meta/12345/x/y/z') p2.toUriString() == 'cid://12345/x/y/z' when: def p3 = factory.parseUri('cid://12345//x///y/z//') then: - p3.getTargetPath() == Path.of('/some/data/.meta/12345/x/y/z') - p2.toUriString() == 'cid://12345/x/y/z' + p3.toUriString() == 'cid://12345/x/y/z' when: factory.parseUri('cid:///12345') diff --git a/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy index c604c5b86d..e75bcca7f3 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy @@ -31,6 +31,7 @@ class DataConfigTest extends Specification { def config = new DataConfig(Map.of()) then: config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() + config.store.logLocation == null !config.enabled } @@ -39,14 +40,16 @@ class DataConfigTest extends Specification { def config = new DataConfig([enabled: true]) then: config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() + config.store.logLocation == null config.enabled } - def 'should create data config' () { + def 'should create data config with location' () { when: - def config = new DataConfig(enabled: true, store: [location: "/some/data/store"]) + def config = new DataConfig(enabled: true, store: [location: "/some/data/store", logLocation: "/some/data/.history"]) then: config.store.location == Path.of("/some/data/store") + config.store.logLocation == Path.of("/some/data/.history") config.enabled } } From 3e2ca194a6b1c7fc57ccb7a56d85b99b333f938a Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 11 Mar 2025 21:52:10 +0100 Subject: [PATCH 24/72] Just blank [ci skip] Signed-off-by: Paolo Di Tommaso --- modules/nextflow/src/main/groovy/nextflow/Session.groovy | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 98c7017d73..09152bc301 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -439,6 +439,7 @@ class Session implements ISession { binding.setArgs( new ScriptRunner.ArgsList(args) ) cache = CacheFactory.create(uniqueId,runName).open() + return this } From 90c8e38cce8a37563495862988d2b69460c1b452 Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 12 Mar 2025 08:37:31 +0100 Subject: [PATCH 25/72] fix unexpected warning in cidpath hash validation Signed-off-by: jorgee --- .../nextflow/data/cid/CidObserver.groovy | 18 ++++++++++-------- .../groovy/nextflow/data/cid/fs/CidPath.groovy | 16 +++++++++++++--- .../nextflow/data/cid/CidObserverTest.groovy | 10 +++++----- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy index e7b6eeef82..9f694640aa 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy @@ -103,7 +103,7 @@ class CidObserver implements TraceObserver { final normalizer = new PathNormalizer(session.workflowMetadata) final mainScript = new DataPath( normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), - new Checksum(session.workflowMetadata.scriptId, "nextflow", CacheHelper.HashMode.DEFAULT().toString()) + new Checksum(session.workflowMetadata.scriptId, "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) ) List otherScripts = new LinkedList<>() for (Path p: ScriptMeta.allScriptNames().values()) { @@ -114,7 +114,7 @@ class CidObserver implements TraceObserver { new Checksum( CacheHelper.hasher(p.text).hash().toString(), "nextflow", - CacheHelper.HashMode.DEFAULT().toString() + CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) ) ) @@ -182,7 +182,7 @@ class CidObserver implements TraceObserver { protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { final codeChecksum = new Checksum(CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), - "nextflow", CacheHelper.HashMode.DEFAULT().toString()) + "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) final value = new nextflow.data.cid.model.TaskRun( DataType.TaskRun, session.uniqueId.toString(), @@ -196,7 +196,8 @@ class CidObserver implements TraceObserver { task.processor.getTaskGlobalVars(task), task.processor.getTaskBinEntries(task.source).collect { Path p -> new DataPath( normalizer.normalizePath(p.normalize()), - new Checksum(CacheHelper.hasher(p).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString()) ) + new Checksum(CacheHelper.hasher(p).hash().toString(), "nextflow", + CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) ) } ) @@ -213,7 +214,7 @@ class CidObserver implements TraceObserver { final cid = "${task.hash}/${rel}" final key = cid.toString() final checksum = new Checksum( CacheHelper.hasher(path).hash().toString(), - "nextflow", CacheHelper.HashMode.DEFAULT().toString() ) + "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) final value = new Output( DataType.TaskOutput, path.toUriString(), @@ -269,7 +270,7 @@ class CidObserver implements TraceObserver { final checksum = new Checksum( CacheHelper.hasher(destination).hash().toString(), "nextflow", - CacheHelper.HashMode.DEFAULT().toString() + CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) final rel = getWorkflowRelative(destination) final key = "$executionHash/${rel}" @@ -309,7 +310,7 @@ class CidObserver implements TraceObserver { final checksum = new Checksum( CacheHelper.hasher(destination).hash().toString(), "nextflow", - CacheHelper.HashMode.DEFAULT().toString() + CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) final rel = getWorkflowRelative(destination) final key = "$executionHash/${rel}" @@ -368,7 +369,8 @@ class CidObserver implements TraceObserver { final ref = getSourceReference(it.storePath) paths.add(ref ? new DataPath(ref) : new DataPath( normalizer.normalizePath(it.storePath), - new Checksum(CacheHelper.hasher(it.storePath).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString())) + new Checksum(CacheHelper.hasher(it.storePath).hash().toString(), "nextflow", + CacheHelper.HashMode.DEFAULT().toString().toLowerCase())) ) } return paths diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy index 279abfaf12..555dc62855 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy @@ -44,7 +44,7 @@ import nextflow.file.FileHelper @Slf4j @CompileStatic class CidPath implements Path { - + static public final List SUPPORTED_CHECKSUM_ALGORITHMS=["nextflow"] static public final String SEPARATOR = '/' public static final String CID_PROT = "${SCHEME}://" @@ -73,8 +73,18 @@ class CidPath implements Path { final hashedPath = FileHelper.toCanonicalPath(cidObject.path as String) if( !hashedPath.exists() ) throw new FileNotFoundException("Target path $cidObject.path does not exists.") - if( cidObject.checksum && CacheHelper.hasher(hashedPath).hash().toString() != cidObject.checksum ) { - log.warn("Checksum of $hashedPath does not match with the one stored in the metadata") + if( cidObject.checksum ) { + final checksum = cidObject.checksum as Map + if( checksum.algorithm as String in SUPPORTED_CHECKSUM_ALGORITHMS ){ + + final hash = checksum.mode + ? CacheHelper.hasher(hashedPath,CacheHelper.HashMode.of(checksum.mode.toString().toLowerCase())).hash().toString() + : CacheHelper.hasher(hashedPath).hash().toString() + if( hash != checksum.value ) + log.warn("Checksum of $hashedPath does not match with the one stored in the metadata") + } else { + log.warn("Checksum of $hashedPath can not be validated. Algorithm ${checksum.algorithm} is not supported") + } } } diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy index 3d927ec54a..1e69e3d5b1 100644 --- a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy @@ -70,7 +70,7 @@ class CidObserverTest extends Specification { def observer = new CidObserver(session, store) def expectedString = '{"type":"WorkflowRun","workflow":{"type": "Workflow",' + '"mainScriptFile":{"path":"file://' + scriptFile.toString() + - '", "checksum": {"value": "78910", "algorithm": "nextflow", "mode": "STANDARD"}},' + + '", "checksum": {"value": "78910", "algorithm": "nextflow", "mode": "standard"}},' + '"otherScriptFiles": [], "repository": "https://nextflow.io/nf-test/",' + '"commitId": "123456" },' + '"sessionId": "' + uniqueId + '",' + @@ -120,7 +120,7 @@ class CidObserverTest extends Specification { def expectedString = '{"type":"TaskRun",' + '"sessionId":"'+uniqueId.toString() + '",' + '"name":"foo", "codeChecksum": {' + - '"value": "' + sourceHash + '", "algorithm": "nextflow", "mode": "STANDARD"},' + + '"value": "' + sourceHash + '", "algorithm": "nextflow", "mode": "standard"},' + '"inputs": null,"container": null,"conda": null,' + '"spack": null,"architecture": null,' + '"globalVars": {},"binEntries": [],"annotations":null}' @@ -165,7 +165,7 @@ class CidObserverTest extends Specification { def expectedString = '{"type":"TaskOutput",' + '"path":"' + outFile.toString() + '",' + '"checksum": { "value":"'+ fileHash + '",' + - '"algorithm": "nextflow", "mode": "STANDARD"},' + + '"algorithm": "nextflow", "mode": "standard"},' + '"source":"cid://15cd5b07",' + '"size":'+attrs.size() + ',' + '"createdAt":' + attrs.creationTime().toMillis() + ',' + @@ -341,7 +341,7 @@ class CidObserverTest extends Specification { def expectedString1 = '{"type":"WorkflowOutput",' + '"path":"' + outFile1.toString() + '",' + '"checksum": {"value": "'+ fileHash1 + '",' + - '"algorithm": "nextflow", "mode": "STANDARD"},' + + '"algorithm": "nextflow", "mode": "standard"},' + '"source":"cid://123987/file.bam",' + '"size":'+attrs1.size() + ',' + '"createdAt":' + attrs1.creationTime().toMillis() + ',' + @@ -360,7 +360,7 @@ class CidObserverTest extends Specification { def expectedString2 = '{"type":"WorkflowOutput",' + '"path":"' + outFile2.toString() + '",' + '"checksum": { "value": "'+ fileHash2 + '",' + - '"algorithm": "nextflow", "mode": "STANDARD"},' + + '"algorithm": "nextflow", "mode": "standard"},' + '"source":"cid://' + observer.executionHash +'",' + '"size":'+attrs2.size() + ',' + '"createdAt":' + attrs2.creationTime().toMillis() + ',' + From 527664517b81110b223880e583cac087253fb682 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 12 Mar 2025 15:14:39 +0100 Subject: [PATCH 26/72] Refactor CID store as plugin (#5877) Signed-off-by: Paolo Di Tommaso Signed-off-by: jorgee Co-authored-by: jorgee --- build.gradle | 6 +- modules/nextflow/build.gradle | 2 +- .../main/groovy/nextflow/cli/CmdCid.groovy | 235 +++--------------- .../nextflow/dag/MermaidHtmlRenderer.groovy | 2 +- .../groovy/nextflow/file/RealPathAware.groovy | 37 +++ .../nextflow/processor/TaskProcessor.groovy | 7 +- .../trace/DefaultObserverFactory.groovy | 10 - .../resources/META-INF/build-info.properties | 6 +- .../main/resources/META-INF/extensions.idx | 2 - .../main/resources/META-INF/plugins-info.txt | 2 +- .../groovy/nextflow/cli/CmdCidTest.groovy | 8 +- modules/nf-cid/build.gradle | 39 +++ .../nextflow/data/cid/CidHistoryFile.groovy | 0 .../nextflow/data/cid/CidHistoryLog.groovy | 0 .../nextflow/data/cid/CidHistoryRecord.groovy | 0 .../nextflow/data/cid/CidObserver.groovy | 0 .../data/cid/CidObserverFactory.groovy | 41 +++ .../main}/nextflow/data/cid/CidStore.groovy | 3 +- .../nextflow/data/cid/CidStoreFactory.groovy | 0 .../nextflow/data/cid/DefaultCidStore.groovy | 0 .../data/cid/DefaultCidStoreFactory.groovy | 2 +- .../nextflow/data/cid/fs/CidFileSystem.groovy | 0 .../data/cid/fs/CidFileSystemProvider.groovy | 0 .../main}/nextflow/data/cid/fs/CidPath.groovy | 5 +- .../data/cid/fs/CidPathFactory.groovy | 0 .../nextflow/data/cid/model/Checksum.groovy | 0 .../nextflow/data/cid/model/DataPath.groovy | 0 .../nextflow/data/cid/model/DataType.groovy | 0 .../nextflow/data/cid/model/Output.groovy | 0 .../nextflow/data/cid/model/Parameter.groovy | 0 .../nextflow/data/cid/model/TaskRun.groovy | 0 .../nextflow/data/cid/model/Workflow.groovy | 0 .../data/cid/model/WorkflowResults.groovy | 0 .../data/cid/model/WorkflowRun.groovy | 0 .../cid/operation/CidOperationImpl.groovy | 217 ++++++++++++++++ .../nextflow/data/config/DataConfig.groovy | 0 .../nextflow/data/config/DataStoreOpts.groovy | 0 .../src/resources/META-INF/extensions.idx | 19 ++ .../java.nio.file.spi.FileSystemProvider | 0 .../services/nextflow.cli.CmdCid$CidOperation | 18 ++ .../data/cid/CidHistoryFileTest.groovy | 0 .../data/cid/CidHistoryRecordTest.groovy | 0 .../nextflow/data/cid/CidObserverTest.groovy | 0 .../data/cid/DefaultCidStoreTest.groovy | 0 .../cid/fs/CidFileSystemProviderTest.groovy | 0 .../nextflow/data/cid/fs/CidPathTest.groovy | 0 .../data/cid/fs/CifPathFactoryTest.groovy | 0 .../data/config/DataConfigTest.groovy | 0 packing.gradle | 3 +- settings.gradle | 1 + 50 files changed, 426 insertions(+), 239 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/file/RealPathAware.groovy create mode 100644 modules/nf-cid/build.gradle rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/CidHistoryFile.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/CidHistoryLog.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/CidHistoryRecord.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/CidObserver.groovy (100%) create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/CidStore.groovy (95%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/CidStoreFactory.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/DefaultCidStore.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/DefaultCidStoreFactory.groovy (94%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/fs/CidFileSystem.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/fs/CidFileSystemProvider.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/fs/CidPath.groovy (99%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/fs/CidPathFactory.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/Checksum.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/DataPath.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/DataType.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/Output.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/Parameter.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/TaskRun.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/Workflow.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/WorkflowResults.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/cid/model/WorkflowRun.groovy (100%) create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/operation/CidOperationImpl.groovy rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/config/DataConfig.groovy (100%) rename modules/{nextflow/src/main/groovy => nf-cid/src/main}/nextflow/data/config/DataStoreOpts.groovy (100%) create mode 100644 modules/nf-cid/src/resources/META-INF/extensions.idx rename modules/{nextflow/src/main => nf-cid/src}/resources/META-INF/services/java.nio.file.spi.FileSystemProvider (100%) create mode 100644 modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/cid/CidHistoryFileTest.groovy (100%) rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/cid/CidHistoryRecordTest.groovy (100%) rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/cid/CidObserverTest.groovy (100%) rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/cid/DefaultCidStoreTest.groovy (100%) rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy (100%) rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/cid/fs/CidPathTest.groovy (100%) rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/cid/fs/CifPathFactoryTest.groovy (100%) rename modules/{nextflow/src/test/groovy => nf-cid/src/test}/nextflow/data/config/DataConfigTest.groovy (100%) diff --git a/build.gradle b/build.gradle index a1aba2dd9a..0c8e11a11b 100644 --- a/build.gradle +++ b/build.gradle @@ -241,7 +241,7 @@ task compile { def getRuntimeConfigs() { def names = subprojects - .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs'] } + .findAll { prj -> prj.name in ['nextflow','nf-cid','nf-commons','nf-httpfs'] } .collect { it.name } FileCollection result = null @@ -267,7 +267,7 @@ task exportClasspath { def home = System.getProperty('user.home') def all = getRuntimeConfigs() def libs = all.collect { File file -> /*println file.canonicalPath.replace(home, '$HOME');*/ file.canonicalPath; } - ['nextflow','nf-commons','nf-httpfs'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } + ['nextflow','nf-cid','nf-commons','nf-httpfs'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } file('.launch.classpath').text = libs.unique().join(':') } } @@ -280,7 +280,7 @@ ext.nexusEmail = project.findProperty('nexusEmail') // `signing.keyId` property needs to be defined in the `gradle.properties` file ext.enableSignArchives = project.findProperty('signing.keyId') -ext.coreProjects = projects( ':nextflow', ':nf-commons', ':nf-httpfs' ) +ext.coreProjects = projects( ':nextflow', ':nf-cid', ':nf-commons', ':nf-httpfs' ) configure(coreProjects) { group = 'io.nextflow' diff --git a/modules/nextflow/build.gradle b/modules/nextflow/build.gradle index cc1e28978b..7cebb1cb79 100644 --- a/modules/nextflow/build.gradle +++ b/modules/nextflow/build.gradle @@ -53,7 +53,7 @@ dependencies { api 'io.seqera:lib-trace:0.1.0' testImplementation 'org.subethamail:subethasmtp:3.1.7' - + testImplementation (project(':nf-cid')) // test configuration testFixturesApi ("org.apache.groovy:groovy-test:4.0.26") { exclude group: 'org.apache.groovy' } testFixturesApi ("org.objenesis:objenesis:3.4") diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index dc3f9e8abe..0b9f00b02d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,37 +17,31 @@ package nextflow.cli +import java.nio.file.Paths + import com.beust.jcommander.Parameter import com.beust.jcommander.Parameters -import groovy.json.JsonSlurper -import groovy.transform.Canonical import groovy.transform.CompileStatic -import nextflow.Session import nextflow.config.ConfigBuilder -import nextflow.dag.MermaidHtmlRenderer -import nextflow.data.cid.CidHistoryRecord -import nextflow.data.cid.CidStore -import nextflow.data.cid.CidStoreFactory -import nextflow.data.cid.model.DataType +import nextflow.config.ConfigMap import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins -import nextflow.ui.TableBuilder - -import java.nio.file.Path -import java.nio.file.Paths - -import static nextflow.data.cid.fs.CidPath.CID_PROT - /** * * @author Paolo Di Tommaso */ @CompileStatic -@Parameters(commandDescription = "Explore workflows CID metadata.") -class CmdCid extends CmdBase implements UsageAware{ +@Parameters(commandDescription = "Explore workflows CID metadata") +class CmdCid extends CmdBase implements UsageAware { private static final String NAME = 'cid' + interface CidOperation { + void log(ConfigMap config) + void show(ConfigMap config, List args) + void lineage(ConfigMap config, List args) + } + interface SubCmd { String getName() String getDescription() @@ -57,6 +51,10 @@ class CmdCid extends CmdBase implements UsageAware{ private List commands = new ArrayList<>() + private CidOperation operations + + private ConfigMap config + CmdCid() { commands << new CmdLog() commands << new CmdShow() @@ -78,7 +76,16 @@ class CmdCid extends CmdBase implements UsageAware{ } // setup the plugins system and load the secrets provider Plugins.init() - + // load the config + this.config = new ConfigBuilder() + .setOptions(launcher.options) + .setBaseDir(Paths.get('.')) + .build() + // load the command operations + this.operations = ServiceLoader.load(CidOperation.class).findFirst().orElse(null) + if( !operations ) + throw new IllegalStateException("Unable to load CID plugin") + // consume the first argument getCmd(args).apply(args.drop(1)) } @@ -150,35 +157,7 @@ class CmdCid extends CmdBase implements UsageAware{ usage() return } - final config = new ConfigBuilder() - .setOptions(getLauncher().getOptions()) - .setBaseDir(Paths.get('.')) - .build() - final session = new Session(config) - final store = CidStoreFactory.getOrCreate(session) - if (store) { - printHistory(store) - } else { - println "Error CID store not loaded. Check Nextflow configuration." - } - } - - private void printHistory(CidStore store) { - final records = store.historyLog?.records - if( records ) { - def table = new TableBuilder(cellSeparator: '\t') - .head('TIMESTAMP') - .head('RUN NAME') - .head('SESSION ID') - .head('RUN CID') - .head('RESULT CID') - for( CidHistoryRecord record: records ){ - table.append(record.toList()) - } - println table.toString() - } else { - println("No workflow runs CIDs found.") - } + operations.log(config) } @Override @@ -200,34 +179,14 @@ class CmdCid extends CmdBase implements UsageAware{ return 'Print the description of a CID reference' } - @Override void apply(List args) { if (args.size() != 1) { println("ERROR: Incorrect number of parameters") usage() return } - if (!args[0].startsWith(CID_PROT)) - throw new Exception("Identifier is not a CID URL") - final key = args[0].substring(CID_PROT.size()) - final config = new ConfigBuilder() - .setOptions(getLauncher().getOptions()) - .setBaseDir(Paths.get('.')) - .build() - final store = CidStoreFactory.getOrCreate(new Session(config)) - if (store) { - try { - final entry = store.load(key) - if( entry ) - println entry.toString() - else - println "No entry found for ${args[0]}." - } catch (Throwable e) { - println "Error loading ${args[0]}." - } - } else { - println "Error CID store not loaded. Check Nextflow configuration." - } + + operations.show(config, args) } @Override @@ -239,13 +198,6 @@ class CmdCid extends CmdBase implements UsageAware{ class CmdLineage implements SubCmd { - @Canonical - class Edge { - String source - String destination - String label - } - @Override String getName() { 'lineage' } @@ -254,141 +206,14 @@ class CmdCid extends CmdBase implements UsageAware{ return 'Render a lineage graph for a workflow output' } - @Override void apply(List args) { if (args.size() != 2) { println("ERROR: Incorrect number of parameters") usage() return } - try { - final config = new ConfigBuilder() - .setOptions(getLauncher().getOptions()) - .setBaseDir(Paths.get('.')) - .build() - final store = CidStoreFactory.getOrCreate(new Session(config)) - final template = readTemplate() - final network = getLineage(store, args[0]) - Path file = Path.of(args[1]) - file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) - println("Linage graph for ${args[0]} rendered in ${args[1]}") - } catch (Throwable e) { - println("ERROR: rendering lineage graph. ${e.message}") - } - } - - private String getLineage(CidStore store, String dataCid) { - def lines = [] as List - lines << "flowchart BT".toString() - - final nodesToRender = new LinkedList() - nodesToRender.add(dataCid) - final edgesToRender = new LinkedList() - while (!nodesToRender.isEmpty()) { - final node = nodesToRender.removeFirst() - processNode(lines, node, nodesToRender, edgesToRender, store) - } - lines << "" - edgesToRender.each { lines << " ${it.source} -->${it.destination}".toString() } - lines << "" - return lines.join('\n') - } - - private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { - if (!nodeToRender.startsWith(CID_PROT)) - throw new Exception("Identifier is not a CID URL") - final slurper = new JsonSlurper() - final key = nodeToRender.substring(CID_PROT.size()) - final cidObject = slurper.parse(store.load(key).toString().toCharArray()) as Map - switch (DataType.valueOf(cidObject.type as String)) { - case DataType.TaskOutput: - case DataType.WorkflowOutput: - lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); - final source = cidObject.source as String - if (source) { - if (source.startsWith(CID_PROT)) { - nodes.add(source) - edges.add(new Edge(source, nodeToRender)) - } else { - final label = convertToLabel(source) - lines << " ${source}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(source, nodeToRender)) - } - } - - break; - case DataType.WorkflowRun: - lines << "${nodeToRender}@{shape: processes, label: \"${cidObject.runName}\"}".toString() - final parameters = cidObject.params as List - parameters.each { - final label = convertToLabel(it.value.toString()) - lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(it.value.toString(), nodeToRender)) - } - break; - case DataType.TaskRun: - lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() - final parameters = cidObject.inputs as List - for (nextflow.data.cid.model.Parameter source: parameters){ - if (source.type.equals(nextflow.script.params.FileInParam.simpleName)) { - manageFileInParam(lines, nodeToRender, nodes, edges, source.value) - } else { - final label = convertToLabel(source.value.toString()) - lines << " ${source.value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(source.value.toString(), nodeToRender)) - } - } - break; - default: - throw new Exception("Unrecognized type reference ${cidObject.type}") - } - } - private String convertToLabel(String label){ - return label.replace('http', 'h\u200Ettp') - } - - private void manageFileInParam(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, value){ - if (value instanceof Collection) { - value.each { manageFileInParam(lines, nodeToRender, nodes, edges, it) } - return - } - if (value instanceof CharSequence) { - final source = value.toString() - if (source.startsWith(CID_PROT)) { - nodes.add(source) - edges.add(new Edge(source, nodeToRender)) - return - } - } - if (value instanceof Map) { - if (value.path) { - final path = value.path.toString() - if (path.startsWith(CID_PROT)) { - nodes.add(path) - edges.add(new Edge(path, nodeToRender)) - return - } else { - final label = convertToLabel(path) - lines << " ${path}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(path, nodeToRender)) - return - } - } - } - final label = convertToLabel(value.toString()) - lines << " ${value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(value.toString(), nodeToRender)) - } - - protected static String readTemplate() { - final writer = new StringWriter() - final res = MermaidHtmlRenderer.class.getResourceAsStream('mermaid.dag.template.html') - int ch - while( (ch=res.read()) != -1 ) { - writer.append(ch as char) - } - writer.toString() + operations.lineage(config, args) } @Override diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy index eb99b44e71..08aae0e3b3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy @@ -33,7 +33,7 @@ class MermaidHtmlRenderer implements DagRenderer { file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) } - private String readTemplate() { + static String readTemplate() { final writer = new StringWriter() final res = MermaidHtmlRenderer.class.getResourceAsStream('mermaid.dag.template.html') int ch diff --git a/modules/nextflow/src/main/groovy/nextflow/file/RealPathAware.groovy b/modules/nextflow/src/main/groovy/nextflow/file/RealPathAware.groovy new file mode 100644 index 0000000000..e8d1903520 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/file/RealPathAware.groovy @@ -0,0 +1,37 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.file + +import java.nio.file.LinkOption +import java.nio.file.Path + +/** + * Marker interface for objects that represent a real path. + * + * This interface is used in the { @link nextflow.processor.TaskProcessor } when managing the foreign file staging. + * + * @author Paolo Di Tommaso + */ +interface RealPathAware { + /** + * Returns the real path + * @param options + * @return + */ + Path toRealPath(LinkOption... options) +} diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 38f0ca9e3b..9c697cf664 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -15,9 +15,6 @@ */ package nextflow.processor -import nextflow.data.cid.fs.CidPath -import nextflow.trace.TraceRecord - import static nextflow.processor.ErrorStrategy.* import java.lang.reflect.InvocationTargetException @@ -81,6 +78,7 @@ import nextflow.file.FileHelper import nextflow.file.FileHolder import nextflow.file.FilePatternSplitter import nextflow.file.FilePorter +import nextflow.file.RealPathAware import nextflow.plugin.Plugins import nextflow.processor.tip.TaskTipProvider import nextflow.script.BaseScript @@ -107,6 +105,7 @@ import nextflow.script.params.TupleInParam import nextflow.script.params.TupleOutParam import nextflow.script.params.ValueInParam import nextflow.script.params.ValueOutParam +import nextflow.trace.TraceRecord import nextflow.util.ArrayBag import nextflow.util.BlankSeparatedList import nextflow.util.CacheHelper @@ -1940,7 +1939,7 @@ class TaskProcessor { if( item instanceof Path || coerceToPath ) { def path = normalizeToPath(item) - if (path instanceof CidPath){ + if (path instanceof RealPathAware){ path = path.toRealPath() } def target = executor.isForeignFile(path) ? batch.addToForeign(path) : path diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index ce1782ffc4..b2657170a8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -3,9 +3,6 @@ package nextflow.trace import java.nio.file.Path import nextflow.Session -import nextflow.data.cid.CidObserver -import nextflow.data.cid.CidStoreFactory - /** * Creates Nextflow observes object * @@ -27,16 +24,9 @@ class DefaultObserverFactory implements TraceObserverFactory { createTimelineObserver(result) createDagObserver(result) createAnsiLogObserver(result) - createCidObserver(result) return result } - protected void createCidObserver(Collection result) { - final store = CidStoreFactory.getOrCreate(session) - if( store ) - result.add( new CidObserver(this.session, store) ) - } - protected void createAnsiLogObserver(Collection result) { if( session.ansiLog ) { session.ansiLogObserver = new AnsiLogObserver() diff --git a/modules/nextflow/src/main/resources/META-INF/build-info.properties b/modules/nextflow/src/main/resources/META-INF/build-info.properties index 0a9fe20b9b..c86fece35f 100644 --- a/modules/nextflow/src/main/resources/META-INF/build-info.properties +++ b/modules/nextflow/src/main/resources/META-INF/build-info.properties @@ -1,4 +1,4 @@ -build=5931 +build=5932 version=25.01.0-edge -timestamp=1739357731332 -commitId=e0916a4b8 +timestamp=1741733332268 +commitId=3e2ca194a diff --git a/modules/nextflow/src/main/resources/META-INF/extensions.idx b/modules/nextflow/src/main/resources/META-INF/extensions.idx index e7ba19b1ab..35521344ea 100644 --- a/modules/nextflow/src/main/resources/META-INF/extensions.idx +++ b/modules/nextflow/src/main/resources/META-INF/extensions.idx @@ -25,5 +25,3 @@ nextflow.mail.SimpleMailProvider nextflow.mail.JavaMailProvider nextflow.processor.tip.DefaultTaskTipProvider nextflow.fusion.FusionTokenDefault -nextflow.data.cid.DefaultCidStoreFactory - diff --git a/modules/nextflow/src/main/resources/META-INF/plugins-info.txt b/modules/nextflow/src/main/resources/META-INF/plugins-info.txt index 0020a46a61..4fc2432da1 100644 --- a/modules/nextflow/src/main/resources/META-INF/plugins-info.txt +++ b/modules/nextflow/src/main/resources/META-INF/plugins-info.txt @@ -5,4 +5,4 @@ nf-codecommit@0.2.3 nf-console@1.1.5 nf-google@1.18.0 nf-tower@1.10.0 -nf-wave@1.10.0 \ No newline at end of file +nf-wave@1.10.0 diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index b3f9f76250..5325761bf2 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -17,18 +17,18 @@ package nextflow.cli import groovy.json.JsonOutput -import nextflow.data.cid.CidHistoryRecord -import nextflow.data.cid.CidStoreFactory import java.nio.file.Files +import nextflow.dag.MermaidHtmlRenderer +import nextflow.data.cid.CidHistoryRecord +import nextflow.data.cid.CidStoreFactory import nextflow.plugin.Plugins import org.junit.Rule import spock.lang.Specification import test.OutputCapture - /** * CLI cid Tests * @@ -235,7 +235,7 @@ class CmdCidTest extends Specification { cid://45678/output.txt -->cid://123987 cid://45678 -->cid://45678/output.txt """ - final template = CmdCid.CmdLineage.readTemplate() + final template = MermaidHtmlRenderer.readTemplate() def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) when: diff --git a/modules/nf-cid/build.gradle b/modules/nf-cid/build.gradle new file mode 100644 index 0000000000..6a7edfabd6 --- /dev/null +++ b/modules/nf-cid/build.gradle @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +apply plugin: 'groovy' + +sourceSets { + main.java.srcDirs = [] + main.groovy.srcDirs = ['src/main'] + main.resources.srcDirs = ['src/resources'] + test.groovy.srcDirs = ['src/test'] + test.java.srcDirs = [] + test.resources.srcDirs = [] +} + +configurations { + // see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies + runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api' +} + +dependencies { + api project(':nextflow') + + testImplementation(testFixtures(project(":nextflow"))) + testImplementation "org.apache.groovy:groovy:4.0.26" + testImplementation "org.apache.groovy:groovy-nio:4.0.26" +} + diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryFile.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryLog.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryLog.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryRecord.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/CidHistoryRecord.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/CidObserver.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy new file mode 100644 index 0000000000..1aa61444a3 --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy @@ -0,0 +1,41 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.trace.TraceObserver +import nextflow.trace.TraceObserverFactory + +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class CidObserverFactory implements TraceObserverFactory { + + @Override + Collection create(Session session) { + final result = new ArrayList(1) + final store = CidStoreFactory.getOrCreate(session) + if( store ) + result.add( new CidObserver(session, store) ) + return result + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy similarity index 95% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy index b22c8b4682..dcbdad189d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ package nextflow.data.cid import java.nio.file.Path -import java.util.function.Consumer import groovy.transform.CompileStatic import nextflow.data.config.DataConfig diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/CidStoreFactory.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStore.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy similarity index 94% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy index c88fdb0459..e8b3b5c7af 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/DefaultCidStoreFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy @@ -27,7 +27,7 @@ import nextflow.plugin.Priority */ @CompileStatic @Priority(0) -class DefaultCidStoreFactory extends CidStoreFactory{ +class DefaultCidStoreFactory extends CidStoreFactory { @Override protected CidStore newInstance(DataConfig config) { diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystem.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidFileSystemProvider.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy similarity index 99% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index 555dc62855..f9370843f4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -20,6 +20,7 @@ package nextflow.data.cid.fs import groovy.json.JsonSlurper import groovy.util.logging.Slf4j import nextflow.data.cid.model.DataType +import nextflow.file.RealPathAware import nextflow.util.CacheHelper import nextflow.util.TestOnly @@ -43,7 +44,9 @@ import nextflow.file.FileHelper */ @Slf4j @CompileStatic -class CidPath implements Path { + +class CidPath implements Path, RealPathAware { + static public final List SUPPORTED_CHECKSUM_ALGORITHMS=["nextflow"] static public final String SEPARATOR = '/' public static final String CID_PROT = "${SCHEME}://" diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/fs/CidPathFactory.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Checksum.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/Checksum.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataPath.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/DataType.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/DataType.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/DataType.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/Output.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/Parameter.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/TaskRun.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/Workflow.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowResults.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/cid/model/WorkflowRun.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy diff --git a/modules/nf-cid/src/main/nextflow/data/cid/operation/CidOperationImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/operation/CidOperationImpl.groovy new file mode 100644 index 0000000000..1588ff917f --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/operation/CidOperationImpl.groovy @@ -0,0 +1,217 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.operation + +import static nextflow.data.cid.fs.CidPath.* + +import java.nio.file.Path + +import groovy.json.JsonSlurper +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.cli.CmdCid +import nextflow.config.ConfigMap +import nextflow.dag.MermaidHtmlRenderer +import nextflow.data.cid.CidHistoryRecord +import nextflow.data.cid.CidStore +import nextflow.data.cid.CidStoreFactory +import nextflow.data.cid.model.DataType +import nextflow.ui.TableBuilder +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class CidOperationImpl implements CmdCid.CidOperation { + + @Canonical + static class Edge { + String source + String destination + String label + } + + @Override + void log(ConfigMap config) { + final session = new Session(config) + final store = CidStoreFactory.getOrCreate(session) + if (store) { + printHistory(store) + } else { + println "Error CID store not loaded. Check Nextflow configuration." + } + } + + private void printHistory(CidStore store) { + final records = store.historyLog?.records + if( records ) { + def table = new TableBuilder(cellSeparator: '\t') + .head('TIMESTAMP') + .head('RUN NAME') + .head('SESSION ID') + .head('RUN CID') + .head('RESULT CID') + for( CidHistoryRecord record: records ){ + table.append(record.toList()) + } + println table.toString() + } else { + println("No workflow runs CIDs found.") + } + } + + @Override + void show(ConfigMap config, List args) { + if (!args[0].startsWith(CID_PROT)) + throw new Exception("Identifier is not a CID URL") + final key = args[0].substring(CID_PROT.size()) + final store = CidStoreFactory.getOrCreate(new Session(config)) + if (store) { + try { + final entry = store.load(key) + if( entry ) + println entry.toString() + else + println "No entry found for ${args[0]}." + } catch (Throwable e) { + println "Error loading ${args[0]}." + } + } else { + println "Error CID store not loaded. Check Nextflow configuration." + } + } + + @Override + void lineage(ConfigMap config, List args) { + try { + final store = CidStoreFactory.getOrCreate(new Session(config)) + final template = MermaidHtmlRenderer.readTemplate() + final network = getLineage(store, args[0]) + Path file = Path.of(args[1]) + file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) + println("Linage graph for ${args[0]} rendered in ${args[1]}") + } catch (Throwable e) { + println("ERROR: rendering lineage graph. ${e.message}") + } + } + + private String getLineage(CidStore store, String dataCid) { + def lines = [] as List + lines << "flowchart BT".toString() + + final nodesToRender = new LinkedList() + nodesToRender.add(dataCid) + final edgesToRender = new LinkedList() + while (!nodesToRender.isEmpty()) { + final node = nodesToRender.removeFirst() + processNode(lines, node, nodesToRender, edgesToRender, store) + } + lines << "" + edgesToRender.each { lines << " ${it.source} -->${it.destination}".toString() } + lines << "" + return lines.join('\n') + } + + private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { + if (!nodeToRender.startsWith(CID_PROT)) + throw new Exception("Identifier is not a CID URL") + final slurper = new JsonSlurper() + final key = nodeToRender.substring(CID_PROT.size()) + final cidObject = slurper.parse(store.load(key).toString().toCharArray()) as Map + switch (DataType.valueOf(cidObject.type as String)) { + case DataType.TaskOutput: + case DataType.WorkflowOutput: + lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); + final source = cidObject.source as String + if (source) { + if (source.startsWith(CID_PROT)) { + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) + } else { + final label = convertToLabel(source) + lines << " ${source}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(source, nodeToRender)) + } + } + + break; + case DataType.WorkflowRun: + lines << "${nodeToRender}@{shape: processes, label: \"${cidObject.runName}\"}".toString() + final parameters = cidObject.params as List + parameters.each { + final label = convertToLabel(it.value.toString()) + lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(it.value.toString(), nodeToRender)) + } + break; + case DataType.TaskRun: + lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() + final parameters = cidObject.inputs as List + for (nextflow.data.cid.model.Parameter source: parameters){ + if (source.type.equals(nextflow.script.params.FileInParam.simpleName)) { + manageFileInParam(lines, nodeToRender, nodes, edges, source.value) + } else { + final label = convertToLabel(source.value.toString()) + lines << " ${source.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(source.value.toString(), nodeToRender)) + } + } + break; + default: + throw new Exception("Unrecognized type reference ${cidObject.type}") + } + } + + private String convertToLabel(String label){ + return label.replace('http', 'h\u200Ettp') + } + + private void manageFileInParam(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, value){ + if (value instanceof Collection) { + value.each { manageFileInParam(lines, nodeToRender, nodes, edges, it) } + return + } + if (value instanceof CharSequence) { + final source = value.toString() + if (source.startsWith(CID_PROT)) { + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) + return + } + } + if (value instanceof Map) { + if (value.path) { + final path = value.path.toString() + if (path.startsWith(CID_PROT)) { + nodes.add(path) + edges.add(new Edge(path, nodeToRender)) + return + } else { + final label = convertToLabel(path) + lines << " ${path}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(path, nodeToRender)) + return + } + } + } + final label = convertToLabel(value.toString()) + lines << " ${value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(value.toString(), nodeToRender)) + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy b/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/config/DataConfig.groovy rename to modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy b/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy similarity index 100% rename from modules/nextflow/src/main/groovy/nextflow/data/config/DataStoreOpts.groovy rename to modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy diff --git a/modules/nf-cid/src/resources/META-INF/extensions.idx b/modules/nf-cid/src/resources/META-INF/extensions.idx new file mode 100644 index 0000000000..f2bf239837 --- /dev/null +++ b/modules/nf-cid/src/resources/META-INF/extensions.idx @@ -0,0 +1,19 @@ +# +# Copyright 2013-2025, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +nextflow.data.cid.DefaultCidStoreFactory +nextflow.data.cid.CidObserverFactory + diff --git a/modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider similarity index 100% rename from modules/nextflow/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider rename to modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider diff --git a/modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation b/modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation new file mode 100644 index 0000000000..afee4231e0 --- /dev/null +++ b/modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation @@ -0,0 +1,18 @@ +# +# Copyright 2013-2025, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +nextflow.data.cid.operation.CidOperationImpl diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryFileTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryRecordTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/cid/CidHistoryRecordTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/cid/CidObserverTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/cid/DefaultCidStoreTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CidPathTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/cid/fs/CifPathFactoryTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy b/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy similarity index 100% rename from modules/nextflow/src/test/groovy/nextflow/data/config/DataConfigTest.groovy rename to modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy diff --git a/packing.gradle b/packing.gradle index 07e404c0d9..d1ba8bc07a 100644 --- a/packing.gradle +++ b/packing.gradle @@ -14,8 +14,9 @@ dependencies { api project(':nextflow') // include Ivy at runtime in order to have Grape @Grab work correctly defaultCfg "org.apache.ivy:ivy:2.5.2" - // default cfg = runtime + httpfs + amazon + tower client + wave client + // default cfg = runtime + httpfs + cid + amazon + tower client + wave client defaultCfg project(':nf-httpfs') + defaultCfg project(':nf-cid') console project(':plugins:nf-console') google project(':plugins:nf-google') amazon project(':plugins:nf-amazon') diff --git a/settings.gradle b/settings.gradle index c9900fd00f..cdc1a07de8 100644 --- a/settings.gradle +++ b/settings.gradle @@ -26,6 +26,7 @@ rootProject.name = 'nextflow-prj' include 'nextflow' include 'nf-commons' include 'nf-httpfs' +include 'nf-cid' rootProject.children.each { prj -> prj.projectDir = new File("$rootDir/modules/$prj.name") From 16f74b488d822bb83dce0328af678bb14fc6ae72 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 15 Mar 2025 17:36:30 +0100 Subject: [PATCH 27/72] Restore unneeded changes [ci fast] Signed-off-by: Paolo Di Tommaso --- .../groovy/nextflow/trace/DefaultObserverFactory.groovy | 1 + .../src/main/resources/META-INF/build-info.properties | 6 +++--- modules/nextflow/src/main/resources/META-INF/extensions.idx | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy index b2657170a8..6c391625c9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/DefaultObserverFactory.groovy @@ -3,6 +3,7 @@ package nextflow.trace import java.nio.file.Path import nextflow.Session + /** * Creates Nextflow observes object * diff --git a/modules/nextflow/src/main/resources/META-INF/build-info.properties b/modules/nextflow/src/main/resources/META-INF/build-info.properties index c86fece35f..0a9fe20b9b 100644 --- a/modules/nextflow/src/main/resources/META-INF/build-info.properties +++ b/modules/nextflow/src/main/resources/META-INF/build-info.properties @@ -1,4 +1,4 @@ -build=5932 +build=5931 version=25.01.0-edge -timestamp=1741733332268 -commitId=3e2ca194a +timestamp=1739357731332 +commitId=e0916a4b8 diff --git a/modules/nextflow/src/main/resources/META-INF/extensions.idx b/modules/nextflow/src/main/resources/META-INF/extensions.idx index 35521344ea..7fb037c37d 100644 --- a/modules/nextflow/src/main/resources/META-INF/extensions.idx +++ b/modules/nextflow/src/main/resources/META-INF/extensions.idx @@ -25,3 +25,4 @@ nextflow.mail.SimpleMailProvider nextflow.mail.JavaMailProvider nextflow.processor.tip.DefaultTaskTipProvider nextflow.fusion.FusionTokenDefault + From eb14d4bc4da91309a49cd5a6fa583e55e9e92531 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Thu, 20 Mar 2025 12:58:53 +0100 Subject: [PATCH 28/72] Add CID H2 plugin (#5889) Signed-off-by: Paolo Di Tommaso Signed-off-by: jorgee Co-authored-by: jorgee --- .../main/groovy/nextflow/cli/CmdCid.groovy | 19 ++- .../groovy/nextflow/cli/CmdCidTest.groovy | 10 +- .../main/nextflow/data/cid/CidStore.groovy | 12 +- .../nextflow/data/cid/CidStoreFactory.groovy | 9 +- .../nextflow/data/cid/DefaultCidStore.groovy | 32 +++- .../data/cid/DefaultCidStoreFactory.groovy | 19 ++- .../CidCommandImpl.groovy} | 5 +- .../nextflow/data/config/DataConfig.groovy | 1 - .../nextflow/data/config/DataStoreOpts.groovy | 12 +- .../src/resources/META-INF/extensions.idx | 2 +- .../cid/DefaultCidStoreFactoryTest.groovy | 50 ++++++ .../data/cid/DefaultCidStoreTest.groovy | 11 +- .../cid/fs/CidFileSystemProviderTest.groovy | 9 +- .../data/config/DataConfigTest.groovy | 12 +- .../main/nextflow/plugin/PluginsFacade.groovy | 8 +- plugins/nf-cid-h2/build.gradle | 49 ++++++ .../data/cid/h2/H2CidHistoryLog.groovy | 133 ++++++++++++++++ .../nextflow/data/cid/h2/H2CidPlugin.groovy | 35 +++++ .../nextflow/data/cid/h2/H2CidStore.groovy | 141 +++++++++++++++++ .../data/cid/h2/H2CidStoreFactory.groovy | 41 +++++ .../src/resources/META-INF/MANIFEST.MF | 6 + .../src/resources/META-INF/extensions.idx | 4 +- .../data/cid/h2/H2CidHistoryLogTest.groovy | 145 ++++++++++++++++++ .../data/cid/h2/H2CidStoreTest.groovy | 50 ++++++ settings.gradle | 1 + 25 files changed, 746 insertions(+), 70 deletions(-) rename modules/nf-cid/src/main/nextflow/data/cid/{operation/CidOperationImpl.groovy => cli/CidCommandImpl.groovy} (98%) create mode 100644 modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy create mode 100644 plugins/nf-cid-h2/build.gradle create mode 100644 plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy create mode 100644 plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy create mode 100644 plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy create mode 100644 plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy create mode 100644 plugins/nf-cid-h2/src/resources/META-INF/MANIFEST.MF rename modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation => plugins/nf-cid-h2/src/resources/META-INF/extensions.idx (89%) create mode 100644 plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy create mode 100644 plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index 0b9f00b02d..677887c43c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -26,7 +26,10 @@ import nextflow.config.ConfigBuilder import nextflow.config.ConfigMap import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins +import org.pf4j.ExtensionPoint + /** + * CID command line interface * * @author Paolo Di Tommaso */ @@ -36,7 +39,7 @@ class CmdCid extends CmdBase implements UsageAware { private static final String NAME = 'cid' - interface CidOperation { + interface CidCommand extends ExtensionPoint { void log(ConfigMap config) void show(ConfigMap config, List args) void lineage(ConfigMap config, List args) @@ -51,7 +54,7 @@ class CmdCid extends CmdBase implements UsageAware { private List commands = new ArrayList<>() - private CidOperation operations + private CidCommand operation private ConfigMap config @@ -81,9 +84,11 @@ class CmdCid extends CmdBase implements UsageAware { .setOptions(launcher.options) .setBaseDir(Paths.get('.')) .build() + // init plugins + Plugins.load(config) // load the command operations - this.operations = ServiceLoader.load(CidOperation.class).findFirst().orElse(null) - if( !operations ) + this.operation = Plugins.getExtension(CidCommand) + if( !operation ) throw new IllegalStateException("Unable to load CID plugin") // consume the first argument getCmd(args).apply(args.drop(1)) @@ -157,7 +162,7 @@ class CmdCid extends CmdBase implements UsageAware { usage() return } - operations.log(config) + operation.log(config) } @Override @@ -186,7 +191,7 @@ class CmdCid extends CmdBase implements UsageAware { return } - operations.show(config, args) + operation.show(config, args) } @Override @@ -213,7 +218,7 @@ class CmdCid extends CmdBase implements UsageAware { return } - operations.lineage(config, args) + operation.lineage(config, args) } @Override diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index 5325761bf2..f314af72c6 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * */ package nextflow.cli @@ -20,6 +21,7 @@ import groovy.json.JsonOutput import java.nio.file.Files +import nextflow.SysEnv import nextflow.dag.MermaidHtmlRenderer import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStoreFactory @@ -36,9 +38,15 @@ import test.OutputCapture */ class CmdCidTest extends Specification { + def setup() { + // clear the environment to avoid the local env pollute the test env + SysEnv.push([:]) + } + def cleanup() { Plugins.stop() CidStoreFactory.reset() + SysEnv.pop() } def setupSpec() { diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy index dcbdad189d..56103f95c1 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy @@ -17,23 +17,21 @@ package nextflow.data.cid -import java.nio.file.Path import groovy.transform.CompileStatic import nextflow.data.config.DataConfig - /** * Interface for the CID store * @author Paolo Di Tommaso */ @CompileStatic -interface CidStore { +interface CidStore extends Closeable { /** * Open the CID store. * @param config Configuration to open the CID store. */ - void open(DataConfig config) + CidStore open(DataConfig config) /** * Save a CID entry in the store for in a given key. @@ -49,12 +47,6 @@ interface CidStore { */ Object load(String key) - /** - * Get the CID store location path. - * @return CID store location path. - */ - Path getPath() - /** * Get the {@link CidHistoryLog} object associated to the CidStore. * @return {@link CidHistoryLog} object diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy index 097aecc4e4..e425cb5af1 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy @@ -38,13 +38,16 @@ abstract class CidStoreFactory implements ExtensionPoint { private static boolean initialized + protected abstract boolean canOpen(DataConfig config) + protected abstract CidStore newInstance(DataConfig config) static CidStore create(DataConfig config){ - final all = Plugins.getPriorityExtensions(CidStoreFactory) - if( !all ) + final factory = Plugins + .getPriorityExtensions(CidStoreFactory) + .find( f-> f.canOpen(config)) + if( !factory ) throw new IllegalStateException("Unable to find Nextflow CID store factory") - final factory = all.first() log.debug "Using Nextflow CID store factory: ${factory.getClass().getName()}" return factory.newInstance(config) } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy index b0d86cd394..13ed3c840f 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy @@ -17,6 +17,7 @@ package nextflow.data.cid +import nextflow.file.FileHelper import nextflow.util.TestOnly import java.nio.file.Files @@ -39,17 +40,25 @@ class DefaultCidStore implements CidStore { private static String HISTORY_FILE_NAME =".history" private static final String METADATA_FILE = '.data.json' private static final String METADATA_PATH = '.meta' + private Path metaLocation private Path location private CidHistoryLog historyLog - void open(DataConfig config) { - location = config.store.location - metaLocation = config.store.location.resolve(METADATA_PATH) + DefaultCidStore open(DataConfig config) { + location = toLocationPath(config.store.location) + metaLocation = location.resolve(METADATA_PATH) if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } - historyLog = new CidHistoryFile(config.store.logLocation ?: metaLocation.resolve(HISTORY_FILE_NAME)) + historyLog = new CidHistoryFile(metaLocation.resolve(HISTORY_FILE_NAME)) + return this + } + + protected Path toLocationPath(String location) { + return location + ? FileHelper.toCanonicalPath(location) + : Path.of('.').toAbsolutePath().normalize().resolve('data') } @Override @@ -70,13 +79,20 @@ class DefaultCidStore implements CidStore { return null } - @Override - Path getPath(){ location } + Path getLocation(){ + return location + } @TestOnly - Path getMetadataPath() {metaLocation} + Path getMetadataPath() { + return metaLocation + } @Override - CidHistoryLog getHistoryLog(){ historyLog } + CidHistoryLog getHistoryLog(){ + return historyLog + } + @Override + void close() { } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy index e8b3b5c7af..0a2d73d261 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy @@ -16,6 +16,8 @@ */ package nextflow.data.cid +import java.util.regex.Pattern + import groovy.transform.CompileStatic import nextflow.data.config.DataConfig import nextflow.plugin.Priority @@ -29,11 +31,22 @@ import nextflow.plugin.Priority @Priority(0) class DefaultCidStoreFactory extends CidStoreFactory { + private static Pattern SCHEME = ~/^([a-zA-Z][a-zA-Z\d+\-.]*):/ + private static List SUPPORTED_SCHEMES = ['file', 's3', 'gs', 'az'] + + @Override + boolean canOpen(DataConfig config) { + final loc = config.store.location + if( !loc ) { + return true + } + final matcher = SCHEME.matcher(loc) + return matcher.find() ? matcher.group(1) in SUPPORTED_SCHEMES : true + } + @Override protected CidStore newInstance(DataConfig config) { - final cidStore = new DefaultCidStore() - cidStore.open(config) - return cidStore + return new DefaultCidStore() .open(config) } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/operation/CidOperationImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy similarity index 98% rename from modules/nf-cid/src/main/nextflow/data/cid/operation/CidOperationImpl.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index 1588ff917f..293aa3789f 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/operation/CidOperationImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -15,7 +15,7 @@ * */ -package nextflow.data.cid.operation +package nextflow.data.cid.cli import static nextflow.data.cid.fs.CidPath.* @@ -34,11 +34,12 @@ import nextflow.data.cid.CidStoreFactory import nextflow.data.cid.model.DataType import nextflow.ui.TableBuilder /** + * Implements CID command line operations * * @author Paolo Di Tommaso */ @CompileStatic -class CidOperationImpl implements CmdCid.CidOperation { +class CidCommandImpl implements CmdCid.CidCommand { @Canonical static class Edge { diff --git a/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy b/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy index 7b5f0687dd..1038ddfe18 100644 --- a/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy +++ b/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy @@ -20,7 +20,6 @@ package nextflow.data.config import groovy.transform.CompileStatic import nextflow.Global import nextflow.Session -import nextflow.util.TestOnly /** * Model workflow data config diff --git a/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy b/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy index be6072a75c..8a4ee10335 100644 --- a/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy +++ b/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy @@ -17,10 +17,6 @@ package nextflow.data.config -import nextflow.file.FileHelper - -import java.nio.file.Path - import groovy.transform.CompileStatic /** * Model data store options @@ -30,14 +26,10 @@ import groovy.transform.CompileStatic @CompileStatic class DataStoreOpts { - final Path location - final Path logLocation + final String location DataStoreOpts(Map opts) { - this.location = opts.location - ? FileHelper.toCanonicalPath(opts.location as String) - : Path.of('.').toAbsolutePath().normalize().resolve('data') - this.logLocation = opts.logLocation ? FileHelper.toCanonicalPath(opts.logLocation as String) : null + this.location = opts.location as String } } diff --git a/modules/nf-cid/src/resources/META-INF/extensions.idx b/modules/nf-cid/src/resources/META-INF/extensions.idx index f2bf239837..e205ab34be 100644 --- a/modules/nf-cid/src/resources/META-INF/extensions.idx +++ b/modules/nf-cid/src/resources/META-INF/extensions.idx @@ -16,4 +16,4 @@ nextflow.data.cid.DefaultCidStoreFactory nextflow.data.cid.CidObserverFactory - +nextflow.data.cid.cli.CidCommandImpl diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy new file mode 100644 index 0000000000..4f4f116a96 --- /dev/null +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy @@ -0,0 +1,50 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import nextflow.data.config.DataConfig +import spock.lang.Specification +import spock.lang.Unroll + +/** + * + * @author Paolo Di Tommaso + */ +class DefaultCidStoreFactoryTest extends Specification { + + @Unroll + def 'should validate can open' () { + given: + def factory = new DefaultCidStoreFactory() + def config = new DataConfig(CONFIG) + + expect: + factory.canOpen(config) == EXPECTED + + where: + EXPECTED | CONFIG + true | [:] + true | [store:[location:'/some/path']] + true | [store:[location:'some/rel/path']] + true | [store:[location:'file:/this/that']] + true | [store:[location:'s3://some/path']] + false | [store:[location:'http://some/path']] + false | [store:[location:'jdbc:foo']] + } + +} diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index aabd794096..a19f72f900 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -17,14 +17,12 @@ package nextflow.data.cid -import nextflow.data.config.DataConfig -import spock.lang.Specification -import spock.lang.TempDir - import java.nio.file.Files import java.nio.file.Path -import java.util.function.Consumer +import nextflow.data.config.DataConfig +import spock.lang.Specification +import spock.lang.TempDir /** * * @author Jorge Ejarque @@ -52,7 +50,6 @@ class DefaultCidStoreTest extends Specification { cidStore.open(config) def historyLog = cidStore.getHistoryLog() then: - cidStore.getPath() == storeLocation cidStore.getMetadataPath() == metaLocation historyLog != null historyLog instanceof CidHistoryFile @@ -94,4 +91,4 @@ class DefaultCidStoreTest extends Specification { expect: cidStore.load("nonexistentKey") == null } -} \ No newline at end of file +} diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy index e5170b6005..d03264133d 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy @@ -17,6 +17,7 @@ package nextflow.data.cid.fs +import nextflow.data.cid.DefaultCidStore import spock.lang.Shared import java.nio.ByteBuffer @@ -79,7 +80,7 @@ class CidFileSystemProviderTest extends Specification { when: def fs = provider.newFileSystem(cid, config) as CidFileSystem then: - fs.cidStore.path == data + (fs.cidStore as DefaultCidStore).location == data } def 'should get a file system' () { @@ -95,9 +96,9 @@ class CidFileSystemProviderTest extends Specification { when: provider.newFileSystem(uri, config) as CidFileSystem and: - def result = provider.getFileSystem(uri) as CidFileSystem + def fs = provider.getFileSystem(uri) as CidFileSystem then: - result.cidStore.path == data + (fs.cidStore as DefaultCidStore).location == data } def 'should get or create a file system' () { @@ -111,7 +112,7 @@ class CidFileSystemProviderTest extends Specification { when: def fs = provider.getFileSystemOrCreate(uri) as CidFileSystem then: - fs.cidStore.path == data + (fs.cidStore as DefaultCidStore).location == data when: def fs2 = provider.getFileSystemOrCreate(uri) as CidFileSystem diff --git a/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy b/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy index e75bcca7f3..5eff1c7103 100644 --- a/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy @@ -17,7 +17,6 @@ package nextflow.data.config -import java.nio.file.Path import spock.lang.Specification /** @@ -30,26 +29,23 @@ class DataConfigTest extends Specification { when: def config = new DataConfig(Map.of()) then: - config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() - config.store.logLocation == null !config.enabled + !config.store.location } def 'should create default with enable' () { when: def config = new DataConfig([enabled: true]) then: - config.store.location == Path.of('.').resolve('data').toAbsolutePath().normalize() - config.store.logLocation == null config.enabled + !config.store.location } def 'should create data config with location' () { when: - def config = new DataConfig(enabled: true, store: [location: "/some/data/store", logLocation: "/some/data/.history"]) + def config = new DataConfig(enabled: true, store: [location: "/some/data/store"]) then: - config.store.location == Path.of("/some/data/store") - config.store.logLocation == Path.of("/some/data/.history") config.enabled + config.store.location == '/some/data/store' } } diff --git a/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy b/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy index 72c5927dbd..20386521d3 100644 --- a/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy +++ b/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy @@ -299,10 +299,12 @@ class PluginsFacade implements PluginStateListener { * The extension with higher priority appears first (lower index) */ def List getPriorityExtensions(Class type,String group=null) { - def result = getExtensions(type) + def extensions = getExtensions(type) if( group ) - result = result.findAll(it -> group0(it)==group ) - return result.sort( it -> priority0(it) ) + extensions = extensions.findAll(it -> group0(it)==group ) + final result = extensions.sort( it -> priority0(it) ) + log.debug "Discovered extensions for type ${type.getName()}: ${extensions.join(',')}" + return result } protected int priority0(Object it) { diff --git a/plugins/nf-cid-h2/build.gradle b/plugins/nf-cid-h2/build.gradle new file mode 100644 index 0000000000..9b9f35f227 --- /dev/null +++ b/plugins/nf-cid-h2/build.gradle @@ -0,0 +1,49 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +apply plugin: 'java' +apply plugin: 'java-test-fixtures' +apply plugin: 'idea' +apply plugin: 'groovy' + +sourceSets { + main.java.srcDirs = [] + main.groovy.srcDirs = ['src/main'] + main.resources.srcDirs = ['src/resources'] + test.groovy.srcDirs = ['src/test'] + test.java.srcDirs = [] + test.resources.srcDirs = [] +} + +configurations { + // see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies + runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api' +} + +dependencies { + compileOnly project(':nextflow') + compileOnly project(':nf-cid') + compileOnly 'org.slf4j:slf4j-api:2.0.16' + compileOnly 'org.pf4j:pf4j:3.12.0' + + api("com.h2database:h2:2.2.224") + api("com.zaxxer:HikariCP:5.0.1") + api("org.apache.groovy:groovy-sql:4.0.26") { transitive=false } + + testImplementation(project(':nf-cid')) + testImplementation(testFixtures(project(":nextflow"))) +} diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy new file mode 100644 index 0000000000..e112f0daea --- /dev/null +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy @@ -0,0 +1,133 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.h2 + +import java.sql.Timestamp + +import com.zaxxer.hikari.HikariDataSource +import groovy.sql.GroovyRowResult +import groovy.sql.Sql +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.cid.CidHistoryLog +import nextflow.data.cid.CidHistoryRecord + +/** + * Implement a {@link CidHistoryLog} based on H2 database + * + * @author Paolo Di Tommaso + */ +@Slf4j +@CompileStatic +class H2CidHistoryLog implements CidHistoryLog { + + private HikariDataSource dataSource + + H2CidHistoryLog(HikariDataSource dataSource) { + this.dataSource = dataSource + } + + @Override + void write(String name, UUID sessionId, String runCid, String resultsCid) { + try(final sql=new Sql(dataSource)) { + def query = """ + INSERT INTO cid_history_record (timestamp, run_name, session_id, run_cid, results_cid) + VALUES (?, ?, ?, ?, ?) + """ + def timestamp = new Timestamp(System.currentTimeMillis()) // Current timestamp + sql.executeInsert(query, List.of(timestamp, name, sessionId.toString(), runCid, resultsCid)) + } + } + + @Override + void updateRunCid(UUID sessionId, String runCid) { + try(final sql=new Sql(dataSource)) { + def query = """ + UPDATE cid_history_record + SET run_cid = ? + WHERE session_id = ? + """ + + final count = sql.executeUpdate(query, List.of(runCid, sessionId.toString())) + if (count > 0) { + log.debug "Successfully updated run_cid for session_id: $sessionId" + } + else { + log.warn "No record found with session_id: $sessionId" + } + } + } + + @Override + void updateResultsCid(UUID sessionId, String resultsCid) { + try(final sql=new Sql(dataSource)) { + def query = """ + UPDATE cid_history_record + SET results_cid = ? + WHERE session_id = ? + """ + + final count = sql.executeUpdate(query, List.of(resultsCid, sessionId.toString())) + if (count > 0) { + log.debug "Successfully updated run_cid for session_id: $sessionId" + } + else { + log.warn "No record found with session_id: $sessionId" + } + } + } + + @Override + List getRecords() { + try(final sql=new Sql(dataSource)) { + final result = new ArrayList(100) + final query = "SELECT * FROM cid_history_record " + final rows = sql.rows(query) + for( GroovyRowResult row : rows ) { + result.add( + new CidHistoryRecord( + row.timestamp as Date, + row.run_name as String, + UUID.fromString(row.session_id as String), + row.run_cid as String, + row.results_cid as String + ) + ) + } + return result + } + } + + @Override + CidHistoryRecord getRecord(UUID sessionId) { + try(final sql=new Sql(dataSource)) { + final query = "SELECT * FROM cid_history_record WHERE session_id = ?" + final row = sql.firstRow(query, sessionId.toString()) // Convert UUID to String for query + if( !row ) + return null + return new CidHistoryRecord( + row.timestamp as Date, + row.run_name as String, + UUID.fromString(row.session_id as String), + row.run_cid as String, + row.results_cid as String + ) + } + } + +} diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy new file mode 100644 index 0000000000..b1ae33d595 --- /dev/null +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy @@ -0,0 +1,35 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.h2 + +import groovy.transform.CompileStatic +import nextflow.plugin.BasePlugin +import org.pf4j.PluginWrapper + +/** + * Implements plugin entry class for H2 db based CID store + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class H2CidPlugin extends BasePlugin{ + + H2CidPlugin(PluginWrapper wrapper) { + super(wrapper) + } +} diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy new file mode 100644 index 0000000000..66239e08af --- /dev/null +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy @@ -0,0 +1,141 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.h2 + + +import java.sql.Clob + +import com.zaxxer.hikari.HikariDataSource +import groovy.sql.Sql +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.cid.CidHistoryLog +import nextflow.data.cid.CidStore +import nextflow.data.config.DataConfig +import nextflow.data.config.DataStoreOpts +import nextflow.util.TestOnly +/** + * + * @author Paolo Di Tommaso + */ +@Slf4j +@CompileStatic +class H2CidStore implements CidStore { + + private HikariDataSource dataSource + + @Override + H2CidStore open(DataConfig config) { + assert config.store.location.startsWith('jdbc:h2:') + log.info "Connecting CID H2 store: '${config.store.location}'" + dataSource = createDataSource(config.store) + // create the db tables + createDbTables(dataSource) + return this + } + + static HikariDataSource createDataSource(DataStoreOpts store) { + final result = new HikariDataSource() + result.jdbcUrl = store.location + result.driverClassName = 'org.h2.Driver' + result.username = 'sa' + result.password = '' + result.maximumPoolSize = 10 + return result + } + + static void createDbTables(HikariDataSource dataSource) { + // create DDL is missing + try(final sql=new Sql(dataSource)) { + sql.execute(''' + CREATE TABLE IF NOT EXISTS cid_file ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + path VARCHAR UNIQUE NOT NULL, + metadata CLOB NOT NULL + ); + + CREATE TABLE IF NOT EXISTS cid_file_tag ( + file_id BIGINT NOT NULL, + tags TEXT NOT NULL, + PRIMARY KEY (file_id), + FOREIGN KEY (file_id) REFERENCES cid_file(id) ON DELETE CASCADE + ); + + CREATE TABLE IF NOT EXISTS cid_history_record ( + id IDENTITY PRIMARY KEY, -- Auto-increment primary key + timestamp TIMESTAMP NOT NULL, + run_name VARCHAR(255) NOT NULL, + session_id UUID NOT NULL, + run_cid VARCHAR(255) NOT NULL, + results_cid VARCHAR(255) NOT NULL, + UNIQUE (run_name, session_id) -- Enforce uniqueness constraint + ); + ''') + } + } + + @Override + void save(String key, Object value) { + try(final sql=new Sql(dataSource)) { + sql.execute(""" + INSERT INTO cid_file (path, metadata) VALUES (?, ?) + """, [key, value]) + } + } + + @Override + Object load(String key) { + try(final sql=new Sql(dataSource)) { + final result = sql.firstRow("SELECT metadata FROM cid_file WHERE path = ?", List.of(key)) + return result ? toValue(result.metadata) : null + } + } + + protected Object toValue(Object obj) { + return obj instanceof Clob + ? obj.characterStream.text + : obj + } + + @Override + CidHistoryLog getHistoryLog() { + return new H2CidHistoryLog(dataSource) + } + + @Override + void close() { + dataSource.close() + } + + @TestOnly + void truncateAllTables() { + try(final sql=new Sql(dataSource)) { + println "Truncating all tables..." + sql.execute("SET REFERENTIAL_INTEGRITY FALSE") // Disable foreign key constraints + + def tables = sql.rows("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'PUBLIC'") + tables.each { table -> + final stm = "TRUNCATE TABLE ${table.TABLE_NAME}" as String + sql.execute(stm) // Truncate each table + } + + sql.execute("SET REFERENTIAL_INTEGRITY TRUE") // Re-enable constraints + println "All tables truncated successfully" + } + } +} diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy new file mode 100644 index 0000000000..f32eb1004d --- /dev/null +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy @@ -0,0 +1,41 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.h2 + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.cid.CidStore +import nextflow.data.cid.CidStoreFactory +import nextflow.data.config.DataConfig +import nextflow.plugin.Priority + +@Slf4j +@CompileStatic +@Priority(-10) // <-- lower is higher, this is needed to override default provider behavior +class H2CidStoreFactory extends CidStoreFactory { + + @Override + boolean canOpen(DataConfig config) { + return config.store.location.startsWith('jdbc:h2:') + } + + @Override + protected CidStore newInstance(DataConfig config) { + return new H2CidStore().open(config) + } +} diff --git a/plugins/nf-cid-h2/src/resources/META-INF/MANIFEST.MF b/plugins/nf-cid-h2/src/resources/META-INF/MANIFEST.MF new file mode 100644 index 0000000000..9eab0f2267 --- /dev/null +++ b/plugins/nf-cid-h2/src/resources/META-INF/MANIFEST.MF @@ -0,0 +1,6 @@ +Manifest-Version: 1.0 +Plugin-Class: nextflow.data.cid.h2.H2CidPlugin +Plugin-Id: nf-cid-h2 +Plugin-Version: 0.1.0 +Plugin-Provider: Seqera Labs +Plugin-Requires: >=25.01.0-edge diff --git a/modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation b/plugins/nf-cid-h2/src/resources/META-INF/extensions.idx similarity index 89% rename from modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation rename to plugins/nf-cid-h2/src/resources/META-INF/extensions.idx index afee4231e0..b61797ca96 100644 --- a/modules/nf-cid/src/resources/META-INF/services/nextflow.cli.CmdCid$CidOperation +++ b/plugins/nf-cid-h2/src/resources/META-INF/extensions.idx @@ -13,6 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# -nextflow.data.cid.operation.CidOperationImpl +nextflow.data.cid.h2.H2CidPlugin +nextflow.data.cid.h2.H2CidStoreFactory diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy new file mode 100644 index 0000000000..43bd6f527e --- /dev/null +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy @@ -0,0 +1,145 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.h2 + +import nextflow.data.config.DataConfig +import spock.lang.Shared +import spock.lang.Specification + +/** + * + * @author Paolo Di Tommaso + */ +class H2CidHistoryLogTest extends Specification { + + @Shared + H2CidStore store + + def setupSpec() { + def uri = "jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1" + def config = new DataConfig([store:[location:uri]]) + store = new H2CidStore().open(config) + } + + def cleanupSpec() { + store.close() + } + + def cleanup() { + store.truncateAllTables() + } + + def 'should write cid record' () { + given: + def log = store.getHistoryLog() + def uuid = UUID.randomUUID() + when: + log.write('foo', uuid, '1234', '4321') + then: + noExceptionThrown() + + when: + def rec = log.getRecord(uuid) + then: + rec.runName == 'foo' + rec.sessionId == uuid + rec.runCid == '1234' + rec.resultsCid == '4321' + } + + def 'should update run cid' () { + given: + def log = store.getHistoryLog() + def uuid = UUID.randomUUID() + when: + log.write('foo', uuid, '1234', '4321') + then: + noExceptionThrown() + + when: + log.updateRunCid(uuid, '4444') + then: + noExceptionThrown() + + when: + def rec = log.getRecord(uuid) + then: + rec.runName == 'foo' + rec.sessionId == uuid + rec.runCid == '4444' + rec.resultsCid == '4321' + } + + def 'should update results cid' () { + given: + def log = store.getHistoryLog() + def uuid = UUID.randomUUID() + when: + log.write('foo', uuid, '1234', '4321') + then: + noExceptionThrown() + + when: + log.updateResultsCid(uuid, '5555') + then: + noExceptionThrown() + + when: + def rec = log.getRecord(uuid) + then: + rec.runName == 'foo' + rec.sessionId == uuid + rec.runCid == '1234' + rec.resultsCid == '5555' + } + + def 'should update get records' () { + given: + def log = store.getHistoryLog() + def uuid1 = UUID.randomUUID() + def uuid2 = UUID.randomUUID() + def uuid3 = UUID.randomUUID() + when: + log.write('foo1', uuid1, '1', '11') + log.write('foo2', uuid2, '2', '22') + log.write('foo3', uuid3, '3', '33') + then: + noExceptionThrown() + + when: + def all = log.getRecords() + then: + all.size()==3 + and: + all[0].runName == 'foo1' + all[0].sessionId == uuid1 + all[0].runCid == '1' + all[0].resultsCid == '11' + and: + all[1].runName == 'foo2' + all[1].sessionId == uuid2 + all[1].runCid == '2' + all[1].resultsCid == '22' + and: + all[2].runName == 'foo3' + all[2].sessionId == uuid3 + all[2].runCid == '3' + all[2].resultsCid == '33' + } + +} diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy new file mode 100644 index 0000000000..601840e635 --- /dev/null +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy @@ -0,0 +1,50 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.h2 + +import nextflow.data.config.DataConfig +import spock.lang.Shared +import spock.lang.Specification + +/** + * + * @author Paolo Di Tommaso + */ +class H2CidStoreTest extends Specification { + + @Shared + H2CidStore store + + def setupSpec() { + def uri = "jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1" + def config = new DataConfig([store:[location:uri]]) + store = new H2CidStore().open(config) + } + + def cleanupSpec() { + store.close() + } + + def 'should store and get a value' () { + when: + store.save('/some/key', 'Hello world') + then: + store.load('/some/key') == 'Hello world' + } + +} diff --git a/settings.gradle b/settings.gradle index cdc1a07de8..21543ddc7b 100644 --- a/settings.gradle +++ b/settings.gradle @@ -41,3 +41,4 @@ include 'plugins:nf-azure' include 'plugins:nf-codecommit' include 'plugins:nf-wave' include 'plugins:nf-cloudcache' +include 'plugins:nf-cid-h2' From db79c430a21e8a7a17fb5b8f0306195f176524c3 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Mon, 24 Mar 2025 11:46:40 +0100 Subject: [PATCH 29/72] Add serde interfaces (#5893) [ci skip] Signed-off-by: Paolo Di Tommaso Signed-off-by: jorgee Co-authored-by: jorgee --- .../groovy/nextflow/cli/CmdCidTest.groovy | 73 ++-- .../main/nextflow/data/cid/CidObserver.groovy | 81 ++--- .../main/nextflow/data/cid/CidStore.groovy | 6 +- .../nextflow/data/cid/DefaultCidStore.groovy | 19 +- .../data/cid/DefaultCidStoreFactory.groovy | 2 +- .../data/cid/cli/CidCommandImpl.groovy | 54 +-- .../main/nextflow/data/cid/fs/CidPath.groovy | 15 +- .../nextflow/data/cid/model/Output.groovy | 5 +- .../nextflow/data/cid/model/TaskOutput.groovy | 32 ++ .../nextflow/data/cid/model/TaskRun.groovy | 4 +- .../nextflow/data/cid/model/Workflow.groovy | 4 +- .../data/cid/model/WorkflowOutput.groovy | 32 ++ .../data/cid/model/WorkflowResults.groovy | 6 +- .../data/cid/model/WorkflowRun.groovy | 4 +- .../nextflow/data/cid/serde/CidEncoder.groovy | 49 +++ .../data/cid/serde/CidSerializable.groovy | 29 ++ .../nextflow/data/cid/CidObserverTest.groovy | 104 ++---- .../data/cid/DefaultCidStoreTest.groovy | 11 +- .../data/cid/serde/CidEncoderTest.groovy | 31 ++ .../src/main/nextflow/serde/Encoder.groovy | 45 +++ .../nextflow/serde/JsonSerializable.groovy} | 11 +- .../serde/gson/GStringSerializer.groovy | 39 ++ .../nextflow/serde/gson/GsonEncoder.groovy | 93 +++++ .../gson/InstantAdapter.groovy} | 7 +- .../serde/gson/OffsetDateTimeAdapter.groovy | 43 +++ .../serde/gson/RuntimeTypeAdapterFactory.java | 342 ++++++++++++++++++ .../src/main/nextflow/util/GsonHelper.groovy | 8 +- .../src/main/nextflow/util/TypeHelper.groovy | 58 +++ .../nextflow/serde/GsonEncoderTest.groovy | 81 +++++ .../src/test/nextflow/serde/MyEncoder.groovy | 69 ++++ .../nextflow/data/cid/h2/H2CidStore.groovy | 14 +- .../data/cid/h2/H2CidStoreTest.groovy | 9 +- 32 files changed, 1149 insertions(+), 231 deletions(-) create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy create mode 100644 modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy create mode 100644 modules/nf-commons/src/main/nextflow/serde/Encoder.groovy rename modules/{nf-cid/src/main/nextflow/data/cid/model/DataType.groovy => nf-commons/src/main/nextflow/serde/JsonSerializable.groovy} (75%) create mode 100644 modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy create mode 100644 modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy rename modules/nf-commons/src/main/nextflow/{util/GsonInstantAdapter.groovy => serde/gson/InstantAdapter.groovy} (90%) create mode 100644 modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy create mode 100644 modules/nf-commons/src/main/nextflow/serde/gson/RuntimeTypeAdapterFactory.java create mode 100644 modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy create mode 100644 modules/nf-commons/src/test/nextflow/serde/GsonEncoderTest.groovy create mode 100644 modules/nf-commons/src/test/nextflow/serde/MyEncoder.groovy diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index f314af72c6..c8f31eed34 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -17,20 +17,22 @@ package nextflow.cli -import groovy.json.JsonOutput - import java.nio.file.Files import nextflow.SysEnv import nextflow.dag.MermaidHtmlRenderer import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStoreFactory +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.TaskRun +import nextflow.data.cid.model.WorkflowOutput +import nextflow.data.cid.serde.CidEncoder import nextflow.plugin.Plugins - import org.junit.Rule import spock.lang.Specification import test.OutputCapture - /** * CLI cid Tests * @@ -130,16 +132,12 @@ class CmdCidTest extends Specification { def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - - def recordEntry = JsonOutput.prettyPrint('{"type":"WorkflowOutput",' + - '"path":"/path/to/file",' + - '"checksum":"45372qe",' + - '"source":"cid://123987/file.bam",' + - '"size": 1234,' + - '"createdAt": 123456789,' + - '"modifiedAt": 123456789,' + - '"annotations":null}') - cidFile.text = recordEntry + def encoder = new CidEncoder().withPrettyPrint(true) + def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", 1234, 123456789, 123456789, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + cidFile.text = jsonSer when: def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid://12345"]) cidCmd.run() @@ -151,8 +149,8 @@ class CmdCidTest extends Specification { .findResults { line -> !line.contains('plugin') ? line : null } then: - stdout.size() == recordEntry.readLines().size() - stdout.join('\n') == recordEntry + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput cleanup: folder?.deleteDir() @@ -204,31 +202,24 @@ class CmdCidTest extends Specification { Files.createDirectories(cidFile3.parent) Files.createDirectories(cidFile4.parent) Files.createDirectories(cidFile5.parent) - - def recordEntry = JsonOutput.prettyPrint('{"type":"WorkflowOutput",' + - '"path":"/path/to/file","checksum":"45372qe","source":"cid://123987/file.bam",' + - '"size": 1234,"createdAt": 123456789, "modifiedAt": 123456789,"annotations":null}') - cidFile.text = recordEntry - recordEntry = JsonOutput.prettyPrint('{"type":"TaskOutput",' + - '"path":"/path/to/file","checksum":"45372qe","source":"cid://123987",' + - '"size": 1234,"createdAt": 123456789,"modifiedAt": 123456789,"annotations":null}') - cidFile2.text = recordEntry - recordEntry = JsonOutput.prettyPrint('{"type":"TaskRun",' + - '"sessionId":"u345-2346-1stw2", "name":"foo","code":"abcde2345",' + - '"inputs": [{"type": "ValueInParam","name": "sample_id","value": "ggal_gut"},' + - '{"type": "FileInParam","name": "reads","value": ["cid://45678/output.txt"]}],' + - '"container": null,"conda": null,"spack": null,"architecture": null,' + - '"globalVars": {},"binEntries": [],"annotations":null}') - cidFile3.text = recordEntry - recordEntry = JsonOutput.prettyPrint('{"type":"TaskOutput",' + - '"path":"/path/to/file","checksum":"45372qe","source":"cid://45678",' + - '"size": 1234,"createdAt": 123456789,"modifiedAt": 123456789,"annotations":null}') - cidFile4.text = recordEntry - recordEntry = JsonOutput.prettyPrint('{"type":"TaskRun",' + - '"sessionId":"u345-2346-1stw2", "name":"bar","code":"abfs2556",' + - '"inputs": null,"container": null,"conda": null,"spack": null,"architecture": null,' + - '"globalVars": {},"binEntries": [],"annotations":null}') - cidFile5.text = recordEntry + def encoder = new CidEncoder() + def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", 1234, 123456789, 123456789, null) + cidFile.text = encoder.encode(entry) + entry = new TaskOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987", 1234, 123456789, 123456789, null) + cidFile2.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), + [new Parameter( "ValueInParam", "sample_id","ggal_gut"), + new Parameter("FileInParam","reads",["cid://45678/output.txt"])], + null, null, null, null, [:],[], null) + cidFile3.text = encoder.encode(entry) + entry = new TaskOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://45678", 1234, 123456789, 123456789, null) + cidFile4.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), + null,null, null, null, null, [:],[], null) + cidFile5.text = encoder.encode(entry) final network = """flowchart BT cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} cid://123987/file.bam@{shape: document, label: "cid://123987/file.bam"} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index 9f694640aa..9182112e16 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -17,40 +17,38 @@ package nextflow.data.cid +import static nextflow.data.cid.fs.CidPath.* + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributes + +import groovy.transform.CompileStatic import groovy.util.logging.Slf4j +import nextflow.Session import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.TaskOutput import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowOutput +import nextflow.data.cid.model.WorkflowResults import nextflow.data.cid.model.WorkflowRun +import nextflow.data.cid.serde.CidEncoder import nextflow.file.FileHelper import nextflow.file.FileHolder +import nextflow.processor.TaskHandler +import nextflow.processor.TaskRun import nextflow.script.ScriptMeta import nextflow.script.params.DefaultInParam import nextflow.script.params.FileInParam -import nextflow.script.params.InParam -import nextflow.util.PathNormalizer -import nextflow.util.TestOnly - -import java.nio.file.Files -import java.nio.file.Path -import java.nio.file.attribute.BasicFileAttributes - -import groovy.json.JsonOutput -import groovy.transform.CompileStatic -import nextflow.Session -import nextflow.data.cid.model.DataType -import nextflow.data.cid.model.Output -import nextflow.processor.TaskHandler -import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam +import nextflow.script.params.InParam import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord import nextflow.util.CacheHelper - -import static nextflow.data.cid.fs.CidPath.CID_PROT - +import nextflow.util.PathNormalizer +import nextflow.util.TestOnly /** * Observer to write the generated workflow metadata in a CID store. * @@ -65,6 +63,7 @@ class CidObserver implements TraceObserver { private Session session private WorkflowResults workflowResults private Map outputsStoreDirCid = new HashMap(10) + private CidEncoder encoder = new CidEncoder() CidObserver(Session session, CidStore store){ this.session = session @@ -83,18 +82,17 @@ class CidObserver implements TraceObserver { void onFlowBegin() { this.executionHash = storeWorkflowRun() workflowResults = new WorkflowResults( - DataType.WorkflowResults, "$CID_PROT${executionHash}", - new ArrayList()) + new ArrayList()) this.store.getHistoryLog().updateRunCid(session.uniqueId, "${CID_PROT}${this.executionHash}") } @Override void onFlowComplete(){ if (this.workflowResults){ - final content = JsonOutput.prettyPrint(JsonOutput.toJson(workflowResults)) - final wfResultsHash = CacheHelper.hasher(content).hash().toString() - this.store.save(wfResultsHash, content) + final json = encoder.encode(workflowResults) + final wfResultsHash = CacheHelper.hasher(json).hash().toString() + this.store.save(wfResultsHash, workflowResults) this.store.getHistoryLog().updateResultsCid(session.uniqueId, "${CID_PROT}${wfResultsHash}") } } @@ -121,23 +119,21 @@ class CidObserver implements TraceObserver { } } final workflow = new Workflow( - DataType.Workflow, mainScript, otherScripts, session.workflowMetadata.repository, session.workflowMetadata.commitId ) final value = new WorkflowRun( - DataType.WorkflowRun, workflow, session.uniqueId.toString(), session.runName, getNormalizedParams(session.params, normalizer) ) - final content = JsonOutput.prettyPrint(JsonOutput.toJson(value)) - final executionHash = CacheHelper.hasher(content).hash().toString() - store.save(executionHash, content) + final json = encoder.encode(value) + final executionHash = CacheHelper.hasher(json).hash().toString() + store.save(executionHash, value) return executionHash } @@ -184,7 +180,6 @@ class CidObserver implements TraceObserver { final codeChecksum = new Checksum(CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) final value = new nextflow.data.cid.model.TaskRun( - DataType.TaskRun, session.uniqueId.toString(), task.getName(), codeChecksum, @@ -203,7 +198,7 @@ class CidObserver implements TraceObserver { // store in the underlying persistence final key = task.hash.toString() - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + store.save(key, value) return key } @@ -215,15 +210,14 @@ class CidObserver implements TraceObserver { final key = cid.toString() final checksum = new Checksum( CacheHelper.hasher(path).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) - final value = new Output( - DataType.TaskOutput, + final value = new TaskOutput( path.toUriString(), checksum, "$CID_PROT$task.hash", attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis()) - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) + store.save(key, value) } catch (Throwable e) { log.warn("Exception storing CID output $path for task ${task.name}. ${e.getLocalizedMessage()}") } @@ -273,20 +267,20 @@ class CidObserver implements TraceObserver { CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) final rel = getWorkflowRelative(destination) - final key = "$executionHash/${rel}" + final key = "$executionHash/${rel}" as String final sourceReference = getSourceReference(source) final attrs = readAttributes(destination) - final value = new Output( - DataType.WorkflowOutput, + final value = new WorkflowOutput( destination.toUriString(), checksum, sourceReference, attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis()) - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) - workflowResults.outputs.add("${CID_PROT}${key}") - } catch (Throwable e) { + store.save(key, value) + workflowResults.outputs.add("${CID_PROT}${key}".toString()) + } + catch (Throwable e) { log.warn("Exception storing CID output $destination for workflow ${executionHash}.", e) } } @@ -315,16 +309,15 @@ class CidObserver implements TraceObserver { final rel = getWorkflowRelative(destination) final key = "$executionHash/${rel}" final attrs = readAttributes(destination) - final value = new Output( - DataType.WorkflowOutput, + final value = new WorkflowOutput( destination.toUriString(), checksum, "${CID_PROT}${executionHash}".toString(), attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis()) - store.save(key, JsonOutput.prettyPrint(JsonOutput.toJson(value))) - workflowResults.outputs.add("${CID_PROT}${key}") + store.save(key, value) + workflowResults.outputs.add("${CID_PROT}${key}" as String) }catch (Throwable e) { log.warn("Exception storing CID output $destination for workflow ${executionHash}. ${e.getLocalizedMessage()}") } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy index 56103f95c1..058cad7d47 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy @@ -17,8 +17,8 @@ package nextflow.data.cid - import groovy.transform.CompileStatic +import nextflow.data.cid.serde.CidSerializable import nextflow.data.config.DataConfig /** * Interface for the CID store @@ -38,14 +38,14 @@ interface CidStore extends Closeable { * @param key Entry key. * @param value Entry object. */ - void save(String key, Object value) + void save(String key, CidSerializable value) /** * Load an entry for a given CID key. * @param key CID key. * @return entry value, or null if key does not exists */ - Object load(String key) + CidSerializable load(String key) /** * Get the {@link CidHistoryLog} object associated to the CidStore. diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy index 13ed3c840f..691b4a8e1c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy @@ -17,17 +17,17 @@ package nextflow.data.cid -import nextflow.file.FileHelper -import nextflow.util.TestOnly - import java.nio.file.Files import java.nio.file.Path import groovy.transform.CompileStatic import groovy.util.logging.Slf4j +import nextflow.data.cid.serde.CidEncoder +import nextflow.data.cid.serde.CidSerializable import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException - +import nextflow.file.FileHelper +import nextflow.util.TestOnly /** * Default Implementation for the a CID store. * @@ -44,10 +44,13 @@ class DefaultCidStore implements CidStore { private Path metaLocation private Path location private CidHistoryLog historyLog + private CidEncoder encoder + DefaultCidStore open(DataConfig config) { location = toLocationPath(config.store.location) metaLocation = location.resolve(METADATA_PATH) + encoder = new CidEncoder() if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } @@ -62,19 +65,19 @@ class DefaultCidStore implements CidStore { } @Override - void save(String key, Object value) { + void save(String key, CidSerializable value) { final path = metaLocation.resolve("$key/$METADATA_FILE") Files.createDirectories(path.parent) log.debug "Save CID file path: $path" - path.text = value + path.text = encoder.encode(value) } @Override - Object load(String key) { + CidSerializable load(String key) { final path = metaLocation.resolve("$key/$METADATA_FILE") log.debug("Loading from path $path") if (path.exists()) - return path.text + return encoder.decode(path.text) log.debug("File for key $key not found") return null } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy index 0a2d73d261..88eeaf41a3 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy @@ -23,7 +23,7 @@ import nextflow.data.config.DataConfig import nextflow.plugin.Priority /** - * Default Factory for CidStore + * Default Factory for CidStore. * * @author Jorge Ejarque */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index 293aa3789f..821b7a76e9 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -21,7 +21,6 @@ import static nextflow.data.cid.fs.CidPath.* import java.nio.file.Path -import groovy.json.JsonSlurper import groovy.transform.Canonical import groovy.transform.CompileStatic import nextflow.Session @@ -31,7 +30,14 @@ import nextflow.dag.MermaidHtmlRenderer import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStore import nextflow.data.cid.CidStoreFactory -import nextflow.data.cid.model.DataType +import nextflow.data.cid.model.Output +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.TaskRun +import nextflow.data.cid.model.WorkflowOutput +import nextflow.data.cid.model.WorkflowRun +import nextflow.data.cid.serde.CidEncoder +import nextflow.script.params.FileInParam import nextflow.ui.TableBuilder /** * Implements CID command line operations @@ -83,15 +89,16 @@ class CidCommandImpl implements CmdCid.CidCommand { throw new Exception("Identifier is not a CID URL") final key = args[0].substring(CID_PROT.size()) final store = CidStoreFactory.getOrCreate(new Session(config)) + final encoder = new CidEncoder().withPrettyPrint(true) if (store) { try { final entry = store.load(key) if( entry ) - println entry.toString() + println encoder.encode(entry) else println "No entry found for ${args[0]}." } catch (Throwable e) { - println "Error loading ${args[0]}." + println "Error loading ${args[0]}. ${e.message}" } } else { println "Error CID store not loaded. Check Nextflow configuration." @@ -132,14 +139,13 @@ class CidCommandImpl implements CmdCid.CidCommand { private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { if (!nodeToRender.startsWith(CID_PROT)) throw new Exception("Identifier is not a CID URL") - final slurper = new JsonSlurper() final key = nodeToRender.substring(CID_PROT.size()) - final cidObject = slurper.parse(store.load(key).toString().toCharArray()) as Map - switch (DataType.valueOf(cidObject.type as String)) { - case DataType.TaskOutput: - case DataType.WorkflowOutput: + final cidObject = store.load(key) + switch (cidObject.getClass()) { + case TaskOutput: + case WorkflowOutput: lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); - final source = cidObject.source as String + final source = (cidObject as Output).source if (source) { if (source.startsWith(CID_PROT)) { nodes.add(source) @@ -150,22 +156,25 @@ class CidCommandImpl implements CmdCid.CidCommand { edges.add(new Edge(source, nodeToRender)) } } - break; - case DataType.WorkflowRun: - lines << "${nodeToRender}@{shape: processes, label: \"${cidObject.runName}\"}".toString() - final parameters = cidObject.params as List + + case WorkflowRun: + final wfRun = cidObject as WorkflowRun + lines << "${nodeToRender}@{shape: processes, label: \"${wfRun.name}\"}".toString() + final parameters = wfRun.params parameters.each { final label = convertToLabel(it.value.toString()) lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); edges.add(new Edge(it.value.toString(), nodeToRender)) } - break; - case DataType.TaskRun: - lines << " ${nodeToRender}@{shape: process, label: \"${cidObject.name}\"}".toString() - final parameters = cidObject.inputs as List - for (nextflow.data.cid.model.Parameter source: parameters){ - if (source.type.equals(nextflow.script.params.FileInParam.simpleName)) { + break + + case TaskRun: + final taskRun = cidObject as TaskRun + lines << " ${nodeToRender}@{shape: process, label: \"${taskRun.name}\"}".toString() + final parameters = taskRun.inputs + for (Parameter source: parameters){ + if (source.type.equals(FileInParam.simpleName)) { manageFileInParam(lines, nodeToRender, nodes, edges, source.value) } else { final label = convertToLabel(source.value.toString()) @@ -173,9 +182,10 @@ class CidCommandImpl implements CmdCid.CidCommand { edges.add(new Edge(source.value.toString(), nodeToRender)) } } - break; + break + default: - throw new Exception("Unrecognized type reference ${cidObject.type}") + throw new Exception("Unrecognized type reference ${cidObject.getClass().getSimpleName()}") } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index f9370843f4..a12b31152c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -17,9 +17,8 @@ package nextflow.data.cid.fs -import groovy.json.JsonSlurper import groovy.util.logging.Slf4j -import nextflow.data.cid.model.DataType +import nextflow.data.cid.model.Output import nextflow.file.RealPathAware import nextflow.util.CacheHelper import nextflow.util.TestOnly @@ -72,13 +71,13 @@ class CidPath implements Path, RealPathAware { this.filePath = resolve0(fs, norm0(path), norm0(more)) } - private static void validateHash(Map cidObject) { + private static void validateHash(Output cidObject) { final hashedPath = FileHelper.toCanonicalPath(cidObject.path as String) if( !hashedPath.exists() ) throw new FileNotFoundException("Target path $cidObject.path does not exists.") if( cidObject.checksum ) { - final checksum = cidObject.checksum as Map - if( checksum.algorithm as String in SUPPORTED_CHECKSUM_ALGORITHMS ){ + final checksum = cidObject.checksum + if( checksum.algorithm in SUPPORTED_CHECKSUM_ALGORITHMS ){ final hash = checksum.mode ? CacheHelper.hasher(hashedPath,CacheHelper.HashMode.of(checksum.mode.toString().toLowerCase())).hash().toString() @@ -106,12 +105,10 @@ class CidPath implements Path, RealPathAware { final store = fs.getCidStore() if( !store ) throw new Exception("CID store not found. Check Nextflow configuration.") - final slurper = new JsonSlurper() final object = store.load(filePath) if ( object ){ - final cidObject = slurper.parse(object.toString().toCharArray()) as Map - final type = DataType.valueOf(cidObject.type as String) - if( type == DataType.TaskOutput || type == DataType.WorkflowOutput ) { + if( object instanceof Output ) { + final cidObject = object as Output // return the real path stored in the metadata validateHash(cidObject) def realPath = FileHelper.toCanonicalPath(cidObject.path as String) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy index cf4bd37d04..6b17b78485 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy @@ -19,15 +19,16 @@ package nextflow.data.cid.model import groovy.transform.Canonical import groovy.transform.CompileStatic +import nextflow.data.cid.serde.CidSerializable /** + * Model a base class for workflow and task outputs * * @author Paolo Di Tommaso */ @Canonical @CompileStatic -class Output { - DataType type +abstract class Output implements CidSerializable { String path Checksum checksum String source diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy new file mode 100644 index 0000000000..f0f2828dff --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy @@ -0,0 +1,32 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import groovy.transform.InheritConstructors + +/** + * Model a task output object + * + * @author Paolo Di Tommaso + */ +@Canonical +@CompileStatic +@InheritConstructors +class TaskOutput extends Output { +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy index c485fdc652..e6b9fbdadc 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy @@ -19,6 +19,7 @@ package nextflow.data.cid.model import groovy.transform.Canonical import groovy.transform.CompileStatic +import nextflow.data.cid.serde.CidSerializable /** * Models a task execution. @@ -27,8 +28,7 @@ import groovy.transform.CompileStatic */ @Canonical @CompileStatic -class TaskRun { - DataType type +class TaskRun implements CidSerializable { String sessionId String name Checksum codeChecksum diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy index c4b8824db4..3688b879f7 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy @@ -19,6 +19,7 @@ package nextflow.data.cid.model import groovy.transform.Canonical import groovy.transform.CompileStatic +import nextflow.data.cid.serde.CidSerializable /** @@ -28,8 +29,7 @@ import groovy.transform.CompileStatic */ @Canonical @CompileStatic -class Workflow { - DataType type +class Workflow implements CidSerializable { DataPath mainScriptFile List otherScriptFiles String repository diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy new file mode 100644 index 0000000000..ec768d797f --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy @@ -0,0 +1,32 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import groovy.transform.InheritConstructors + +/** + * Model a workflow output object + * + * @author Paolo Di Tommaso + */ +@Canonical +@CompileStatic +@InheritConstructors +class WorkflowOutput extends Output { +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy index 23d6ad179b..5aed920bdd 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy @@ -19,6 +19,7 @@ package nextflow.data.cid.model import groovy.transform.Canonical import groovy.transform.CompileStatic +import nextflow.data.cid.serde.CidSerializable /** * Models the results of a workflow execution. @@ -27,8 +28,7 @@ import groovy.transform.CompileStatic */ @Canonical @CompileStatic -class WorkflowResults { - DataType type +class WorkflowResults implements CidSerializable { String run - List outputs + List outputs } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy index 2eb760601c..d0f76871f1 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy @@ -19,6 +19,7 @@ package nextflow.data.cid.model import groovy.transform.Canonical import groovy.transform.CompileStatic +import nextflow.data.cid.serde.CidSerializable /** * Models a Workflow Execution @@ -27,8 +28,7 @@ import groovy.transform.CompileStatic */ @Canonical @CompileStatic -class WorkflowRun { - DataType type +class WorkflowRun implements CidSerializable { Workflow workflow String sessionId String name diff --git a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy new file mode 100644 index 0000000000..3ef6294be7 --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy @@ -0,0 +1,49 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.data.cid.serde + + +import groovy.transform.CompileStatic +import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.TaskRun +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowOutput +import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.WorkflowRun +import nextflow.serde.gson.GsonEncoder +import nextflow.serde.gson.RuntimeTypeAdapterFactory +/** + * Implements a JSON encoder for CID model objects + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class CidEncoder extends GsonEncoder { + + CidEncoder() { + withTypeAdapterFactory( + RuntimeTypeAdapterFactory.of(CidSerializable.class, "type") + .registerSubtype(WorkflowRun, WorkflowRun.simpleName) + .registerSubtype(WorkflowResults, WorkflowResults.simpleName) + .registerSubtype(Workflow, Workflow.simpleName) + .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) + .registerSubtype(TaskRun, TaskRun.simpleName) + .registerSubtype(TaskOutput, TaskOutput.simpleName) + ) + } + +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy new file mode 100644 index 0000000000..a0eee91cad --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy @@ -0,0 +1,29 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.data.cid.serde + +import groovy.transform.CompileStatic +import nextflow.serde.JsonSerializable +/** + * Marker interface for CID serializable objects + * + * @author Paolo Di Tommaso + */ +@CompileStatic +interface CidSerializable extends JsonSerializable { + +} diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 1e69e3d5b1..2ef1c69d62 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -17,14 +17,7 @@ package nextflow.data.cid -import groovy.json.JsonOutput -import nextflow.data.config.DataConfig -import nextflow.processor.TaskConfig -import nextflow.processor.TaskProcessor -import nextflow.script.ScriptBinding -import nextflow.script.WorkflowMetadata -import nextflow.util.CacheHelper -import nextflow.util.PathNormalizer +import static nextflow.data.cid.fs.CidPath.* import java.nio.file.Files import java.nio.file.Path @@ -32,12 +25,24 @@ import java.nio.file.attribute.BasicFileAttributes import com.google.common.hash.HashCode import nextflow.Session +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowOutput +import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.WorkflowRun +import nextflow.data.cid.serde.CidEncoder +import nextflow.data.config.DataConfig +import nextflow.processor.TaskConfig import nextflow.processor.TaskId +import nextflow.processor.TaskProcessor import nextflow.processor.TaskRun +import nextflow.script.ScriptBinding +import nextflow.script.WorkflowMetadata +import nextflow.util.CacheHelper +import nextflow.util.PathNormalizer import spock.lang.Specification - -import static nextflow.data.cid.fs.CidPath.CID_PROT - /** * * @author Paolo Di Tommaso @@ -68,18 +73,14 @@ class CidObserverTest extends Specification { } store.open(DataConfig.create(session)) def observer = new CidObserver(session, store) - def expectedString = '{"type":"WorkflowRun","workflow":{"type": "Workflow",' + - '"mainScriptFile":{"path":"file://' + scriptFile.toString() + - '", "checksum": {"value": "78910", "algorithm": "nextflow", "mode": "standard"}},' + - '"otherScriptFiles": [], "repository": "https://nextflow.io/nf-test/",' + - '"commitId": "123456" },' + - '"sessionId": "' + uniqueId + '",' + - '"name": "test_run", "params": []}' + def mainScript = new DataPath("file://${scriptFile.toString()}", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow(mainScript, [],"https://nextflow.io/nf-test/", "123456" ) + def workflowRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", []) when: observer.onFlowCreate(session) observer.onFlowBegin() then: - folder.resolve(".meta/${observer.executionHash}/.data.json").text == JsonOutput.prettyPrint(expectedString) + folder.resolve(".meta/${observer.executionHash}/.data.json").text == new CidEncoder().encode(workflowRun) cleanup: folder?.deleteDir() @@ -117,17 +118,13 @@ class CidObserverTest extends Specification { normalizePath( _ as Path) >> {Path p -> p?.toString()} normalizePath( _ as String) >> {String p -> p} } - def expectedString = '{"type":"TaskRun",' + - '"sessionId":"'+uniqueId.toString() + '",' + - '"name":"foo", "codeChecksum": {' + - '"value": "' + sourceHash + '", "algorithm": "nextflow", "mode": "standard"},' + - '"inputs": null,"container": null,"conda": null,' + - '"spack": null,"architecture": null,' + - '"globalVars": {},"binEntries": [],"annotations":null}' + def taskDescription = new nextflow.data.cid.model.TaskRun(uniqueId.toString(), "foo", + new Checksum(sourceHash, "nextflow", "standard"), + null, null, null, null, null, [:], [], null ) when: observer.storeTaskRun(task, normalizer) then: - folder.resolve(".meta/${hash.toString()}/.data.json").text == JsonOutput.prettyPrint(expectedString) + folder.resolve(".meta/${hash.toString()}/.data.json").text == new CidEncoder().encode(taskDescription) cleanup: folder?.deleteDir() @@ -162,24 +159,15 @@ class CidObserverTest extends Specification { } and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) - def expectedString = '{"type":"TaskOutput",' + - '"path":"' + outFile.toString() + '",' + - '"checksum": { "value":"'+ fileHash + '",' + - '"algorithm": "nextflow", "mode": "standard"},' + - '"source":"cid://15cd5b07",' + - '"size":'+attrs.size() + ',' + - '"createdAt":' + attrs.creationTime().toMillis() + ',' + - '"modifiedAt":'+ attrs.lastModifiedTime().toMillis() + ',' + - '"annotations":null}' - + def output = new TaskOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), + "cid://15cd5b07", attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) and: observer.readAttributes(outFile) >> attrs when: observer.storeTaskOutput(task, outFile) then: - folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text - == JsonOutput.prettyPrint(expectedString) + folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text == new CidEncoder().encode(output) cleanup: folder?.deleteDir() @@ -266,8 +254,6 @@ class CidObserverTest extends Specification { Path.of('outDir') | Path.of('outDir/relative') | "relative" Path.of('/path/to/outDir') | Path.of('results/relative') | "results/relative" Path.of('/path/to/outDir') | Path.of('./relative') | "relative" - - } def 'should return exception when relativise workflow output dirs' (){ @@ -287,8 +273,6 @@ class CidObserverTest extends Specification { OUTPUT_DIR | PATH | EXPECTED Path.of('/path/to/outDir') | Path.of('/another/path/') | "relative" Path.of('/path/to/outDir') | Path.of('../relative') | "relative" - - } def 'should save workflow output' (){ @@ -319,6 +303,7 @@ class CidObserverTest extends Specification { } store.open(DataConfig.create(session)) def observer = new CidObserver(session, store) + def encoder = new CidEncoder() when: 'Starting workflow' observer.onFlowCreate(session) @@ -338,16 +323,9 @@ class CidObserverTest extends Specification { then: 'check file 1 output metadata in cid store' def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() - def expectedString1 = '{"type":"WorkflowOutput",' + - '"path":"' + outFile1.toString() + '",' + - '"checksum": {"value": "'+ fileHash1 + '",' + - '"algorithm": "nextflow", "mode": "standard"},' + - '"source":"cid://123987/file.bam",' + - '"size":'+attrs1.size() + ',' + - '"createdAt":' + attrs1.creationTime().toMillis() + ',' + - '"modifiedAt":'+ attrs1.lastModifiedTime().toMillis() + ',' + - '"annotations":null}' - folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == JsonOutput.prettyPrint(expectedString1) + def output1 = new WorkflowOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), "cid://123987/file.bam", + attrs1.size(), attrs1.creationTime().toMillis(), attrs1.lastModifiedTime().toMillis() ) + folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == encoder.encode(output1) when: 'publish without source path' def outFile2 = outputDir.resolve('foo/file2.bam') @@ -357,27 +335,17 @@ class CidObserverTest extends Specification { def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() observer.onFilePublish(outFile2) then: 'Check outFile2 metadata in cid store' - def expectedString2 = '{"type":"WorkflowOutput",' + - '"path":"' + outFile2.toString() + '",' + - '"checksum": { "value": "'+ fileHash2 + '",' + - '"algorithm": "nextflow", "mode": "standard"},' + - '"source":"cid://' + observer.executionHash +'",' + - '"size":'+attrs2.size() + ',' + - '"createdAt":' + attrs2.creationTime().toMillis() + ',' + - '"modifiedAt":'+ attrs2.lastModifiedTime().toMillis() + ',' + - '"annotations":null}' - folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == JsonOutput.prettyPrint(expectedString2) + def output2 = new WorkflowOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), "cid://${observer.executionHash}" , + attrs2.size(), attrs2.creationTime().toMillis(), attrs2.lastModifiedTime().toMillis() ) + folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == encoder.encode(output2) when: 'Workflow complete' observer.onFlowComplete() then: 'Check history file is updated and Workflow Result is written in the cid store' - def expectedString3 = '{"type":"WorkflowResults",' + - '"run":"cid://' + observer.executionHash +'",' + - '"outputs": [ "cid://'+ observer.executionHash + '/foo/file.bam",' + - '"cid://'+ observer.executionHash + '/foo/file2.bam" ]}' + def results = new WorkflowResults( "cid://${observer.executionHash}", [ "cid://${observer.executionHash}/foo/file.bam", "cid://${observer.executionHash}/foo/file2.bam"]) def finalCid = store.getHistoryLog().getRecord(uniqueId).resultsCid.substring(CID_PROT.size()) finalCid != observer.executionHash - folder.resolve(".meta/${finalCid}/.data.json").text == JsonOutput.prettyPrint(expectedString3) + folder.resolve(".meta/${finalCid}/.data.json").text == encoder.encode(results) cleanup: folder?.deleteDir() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index a19f72f900..af260228f1 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -20,6 +20,9 @@ package nextflow.data.cid import java.nio.file.Files import java.nio.file.Path +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.serde.CidEncoder import nextflow.data.config.DataConfig import spock.lang.Specification import spock.lang.TempDir @@ -58,7 +61,7 @@ class DefaultCidStoreTest extends Specification { def "save should store value in the correct file location"() { given: def key = "testKey" - def value = "testValue" + def value = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) def cidStore = new DefaultCidStore() cidStore.open(config) @@ -68,19 +71,19 @@ class DefaultCidStoreTest extends Specification { then: def filePath = metaLocation.resolve("$key/.data.json") Files.exists(filePath) - filePath.text == value + filePath.text == new CidEncoder().encode(value) } def "load should retrieve stored value correctly"() { given: def key = "testKey" - def value = "testValue" + def value = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) def cidStore = new DefaultCidStore() cidStore.open(config) cidStore.save(key, value) expect: - cidStore.load(key) == value + cidStore.load(key).toString() == value.toString() } def "load should return null if key does not exist"() { diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy new file mode 100644 index 0000000000..a90b806b58 --- /dev/null +++ b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy @@ -0,0 +1,31 @@ +package nextflow.data.cid.serde + +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.Output +import nextflow.data.cid.model.TaskOutput +import spock.lang.Specification + +class CidEncoderTest extends Specification{ + + def 'should encode and decode Outputs'(){ + given: + def encoder = new CidEncoder() + and: + def output = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) + + when: + def encoded = encoder.encode(output) + def object = encoder.decode(encoded) + + then: + object instanceof Output + output.path == "/path/to/file" + output.checksum instanceof Checksum + output.checksum.value == "hash_value" + output.checksum.algorithm == "hash_algorithm" + output.checksum.mode == "standard" + output.source == "cid://source" + output.size == 1234 + + } +} diff --git a/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy b/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy new file mode 100644 index 0000000000..f8a0fb48e0 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy @@ -0,0 +1,45 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde; + +/** + * An interface for encoding and decoding objects between two types. + * + * @param the type of the original object to be encoded. + * @param the type of the encoded representation. + * + * @author Paolo Di Tommaso + */ +interface Encoder { + + /** + * Encodes an object of type {@code T} into its corresponding encoded representation of type {@code S}. + * + * @param object the object to encode + * @return the encoded representation of the object + */ + S encode(T object) + + /** + * Decodes an encoded representation of type {@code S} back into its original form of type {@code T}. + * + * @param encoded the encoded representation to decode + * @return the decoded object + */ + T decode(S encoded); + +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/DataType.groovy b/modules/nf-commons/src/main/nextflow/serde/JsonSerializable.groovy similarity index 75% rename from modules/nf-cid/src/main/nextflow/data/cid/model/DataType.groovy rename to modules/nf-commons/src/main/nextflow/serde/JsonSerializable.groovy index 0a905e95a0..8ec7292156 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/DataType.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/JsonSerializable.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,16 +12,15 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ -package nextflow.data.cid.model +package nextflow.serde /** - * Possible metadata type entries. + * Implements a marker interface for Json serialization objects. * * @author Paolo Di Tommaso */ -enum DataType { - TaskRun, Workflow, WorkflowRun, TaskOutput, WorkflowOutput, WorkflowResults +interface JsonSerializable { + } diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy new file mode 100644 index 0000000000..3f088c87e0 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson + +import java.lang.reflect.Type + +import com.google.gson.JsonElement +import com.google.gson.JsonPrimitive +import com.google.gson.JsonSerializationContext +import com.google.gson.JsonSerializer +import groovy.transform.CompileStatic +/** + * Implements a Gson serializer for Groovy GString + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class GStringSerializer implements JsonSerializer { + + @Override + JsonElement serialize(GString src, Type typeOfSrc, JsonSerializationContext context) { + // Convert GString to plain String + return new JsonPrimitive(src.toString()); + } +} diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy new file mode 100644 index 0000000000..1fd34ccc29 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy @@ -0,0 +1,93 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson + +import java.lang.reflect.Type +import java.time.Instant +import java.time.OffsetDateTime + +import com.google.gson.Gson +import com.google.gson.GsonBuilder +import com.google.gson.TypeAdapterFactory +import groovy.transform.CompileStatic +import nextflow.serde.Encoder +import nextflow.util.TypeHelper +import org.codehaus.groovy.runtime.GStringImpl + +/** + * Implement a JSON encoder based on Google Gson + * + * @author Paolo Di Tommaso + */ +@CompileStatic +abstract class GsonEncoder implements Encoder { + + private Type type + + private TypeAdapterFactory factory + + private boolean prettyPrint + + private volatile Gson gson + + protected GsonEncoder() { + this.type = TypeHelper.getGenericType(this, 0) + } + + GsonEncoder withTypeAdapterFactory(TypeAdapterFactory factory) { + this.factory = factory + return this + } + + GsonEncoder withPrettyPrint(boolean value) { + this.prettyPrint = value + return this + } + + private Gson gson0() { + if( gson ) + return gson + synchronized (this) { + if( gson ) + return gson + return gson = create0() + } + } + + private Gson create0() { + final builder = new GsonBuilder() + builder.registerTypeAdapter(Instant.class, new InstantAdapter()) + builder.registerTypeAdapter(OffsetDateTime.class, new OffsetDateTimeAdapter()) + builder.registerTypeAdapter(GStringImpl.class, new GStringSerializer()) + if( factory ) + builder.registerTypeAdapterFactory(factory) + if( prettyPrint ) + builder.setPrettyPrinting() + return builder.create() + } + + @Override + String encode(T object) { + return gson0().toJson(object, type) + } + + @Override + T decode(String json) { + gson0().fromJson(json, type) + } + +} diff --git a/modules/nf-commons/src/main/nextflow/util/GsonInstantAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy similarity index 90% rename from modules/nf-commons/src/main/nextflow/util/GsonInstantAdapter.groovy rename to modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy index 5415c465a3..5a09bec52a 100644 --- a/modules/nf-commons/src/main/nextflow/util/GsonInstantAdapter.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,10 +12,9 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ -package nextflow.util +package nextflow.serde.gson import java.time.Instant @@ -30,7 +29,7 @@ import groovy.transform.CompileStatic * @author Paolo Di Tommaso */ @CompileStatic -class GsonInstantAdapter extends TypeAdapter { +class InstantAdapter extends TypeAdapter { @Override void write(JsonWriter writer, Instant value) throws IOException { writer.value(value?.toString()) diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy new file mode 100644 index 0000000000..5ebccee529 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy @@ -0,0 +1,43 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson + +import java.time.Instant +import java.time.OffsetDateTime + +import com.google.gson.TypeAdapter +import com.google.gson.stream.JsonReader +import com.google.gson.stream.JsonWriter +import groovy.transform.CompileStatic + +/** + * Implements a Gson adapter for {@link Instant} + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class OffsetDateTimeAdapter extends TypeAdapter { + @Override + void write(JsonWriter writer, OffsetDateTime value) throws IOException { + writer.value(value?.toString()) + } + + @Override + OffsetDateTime read(JsonReader reader) throws IOException { + return OffsetDateTime.parse(reader.nextString()) + } +} diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/RuntimeTypeAdapterFactory.java b/modules/nf-commons/src/main/nextflow/serde/gson/RuntimeTypeAdapterFactory.java new file mode 100644 index 0000000000..f087a35eeb --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/RuntimeTypeAdapterFactory.java @@ -0,0 +1,342 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonPrimitive; +import com.google.gson.TypeAdapter; +import com.google.gson.TypeAdapterFactory; +import com.google.gson.reflect.TypeToken; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; + +/* + * NOTE: this class is copied from Gson extra module which is not included in the default + * library distribution. + * + * See + * https://github.com/google/gson/blob/main/extras/src/main/java/com/google/gson/typeadapters/RuntimeTypeAdapterFactory.java + * + */ + + +/** + * Adapts values whose runtime type may differ from their declaration type. This is necessary when a + * field's type is not the same type that GSON should create when deserializing that field. For + * example, consider these types: + * + *
{@code
+ * abstract class Shape {
+ *   int x;
+ *   int y;
+ * }
+ * class Circle extends Shape {
+ *   int radius;
+ * }
+ * class Rectangle extends Shape {
+ *   int width;
+ *   int height;
+ * }
+ * class Diamond extends Shape {
+ *   int width;
+ *   int height;
+ * }
+ * class Drawing {
+ *   Shape bottomShape;
+ *   Shape topShape;
+ * }
+ * }
+ * + *

Without additional type information, the serialized JSON is ambiguous. Is the bottom shape in + * this drawing a rectangle or a diamond? + * + *

{@code
+ * {
+ *   "bottomShape": {
+ *     "width": 10,
+ *     "height": 5,
+ *     "x": 0,
+ *     "y": 0
+ *   },
+ *   "topShape": {
+ *     "radius": 2,
+ *     "x": 4,
+ *     "y": 1
+ *   }
+ * }
+ * }
+ * + * This class addresses this problem by adding type information to the serialized JSON and honoring + * that type information when the JSON is deserialized: + * + *
{@code
+ * {
+ *   "bottomShape": {
+ *     "type": "Diamond",
+ *     "width": 10,
+ *     "height": 5,
+ *     "x": 0,
+ *     "y": 0
+ *   },
+ *   "topShape": {
+ *     "type": "Circle",
+ *     "radius": 2,
+ *     "x": 4,
+ *     "y": 1
+ *   }
+ * }
+ * }
+ * + * Both the type field name ({@code "type"}) and the type labels ({@code "Rectangle"}) are + * configurable. + * + *

Registering Types

+ * + * Create a {@code RuntimeTypeAdapterFactory} by passing the base type and type field name to the + * {@link #of} factory method. If you don't supply an explicit type field name, {@code "type"} will + * be used. + * + *
{@code
+ * RuntimeTypeAdapterFactory shapeAdapterFactory
+ *     = RuntimeTypeAdapterFactory.of(Shape.class, "type");
+ * }
+ * + * Next register all of your subtypes. Every subtype must be explicitly registered. This protects + * your application from injection attacks. If you don't supply an explicit type label, the type's + * simple name will be used. + * + *
{@code
+ * shapeAdapterFactory.registerSubtype(Rectangle.class, "Rectangle");
+ * shapeAdapterFactory.registerSubtype(Circle.class, "Circle");
+ * shapeAdapterFactory.registerSubtype(Diamond.class, "Diamond");
+ * }
+ * + * Finally, register the type adapter factory in your application's GSON builder: + * + *
{@code
+ * Gson gson = new GsonBuilder()
+ *     .registerTypeAdapterFactory(shapeAdapterFactory)
+ *     .create();
+ * }
+ * + * Like {@code GsonBuilder}, this API supports chaining: + * + *
{@code
+ * RuntimeTypeAdapterFactory shapeAdapterFactory = RuntimeTypeAdapterFactory.of(Shape.class)
+ *     .registerSubtype(Rectangle.class)
+ *     .registerSubtype(Circle.class)
+ *     .registerSubtype(Diamond.class);
+ * }
+ * + *

Serialization and deserialization

+ * + * In order to serialize and deserialize a polymorphic object, you must specify the base type + * explicitly. + * + *
{@code
+ * Diamond diamond = new Diamond();
+ * String json = gson.toJson(diamond, Shape.class);
+ * }
+ * + * And then: + * + *
{@code
+ * Shape shape = gson.fromJson(json, Shape.class);
+ * }
+ */ +public final class RuntimeTypeAdapterFactory implements TypeAdapterFactory { + private final Class baseType; + private final String typeFieldName; + private final Map> labelToSubtype = new LinkedHashMap<>(); + private final Map, String> subtypeToLabel = new LinkedHashMap<>(); + private final boolean maintainType; + private boolean recognizeSubtypes; + + private RuntimeTypeAdapterFactory(Class baseType, String typeFieldName, boolean maintainType) { + if (typeFieldName == null || baseType == null) { + throw new NullPointerException(); + } + this.baseType = baseType; + this.typeFieldName = typeFieldName; + this.maintainType = maintainType; + } + + /** + * Creates a new runtime type adapter for {@code baseType} using {@code typeFieldName} as the type + * field name. Type field names are case sensitive. + * + * @param maintainType true if the type field should be included in deserialized objects + */ + public static RuntimeTypeAdapterFactory of( + Class baseType, String typeFieldName, boolean maintainType) { + return new RuntimeTypeAdapterFactory<>(baseType, typeFieldName, maintainType); + } + + /** + * Creates a new runtime type adapter for {@code baseType} using {@code typeFieldName} as the type + * field name. Type field names are case sensitive. + */ + public static RuntimeTypeAdapterFactory of(Class baseType, String typeFieldName) { + return new RuntimeTypeAdapterFactory<>(baseType, typeFieldName, false); + } + + /** + * Creates a new runtime type adapter for {@code baseType} using {@code "type"} as the type field + * name. + */ + public static RuntimeTypeAdapterFactory of(Class baseType) { + return new RuntimeTypeAdapterFactory<>(baseType, "type", false); + } + + /** + * Ensures that this factory will handle not just the given {@code baseType}, but any subtype of + * that type. + */ + @CanIgnoreReturnValue + public RuntimeTypeAdapterFactory recognizeSubtypes() { + this.recognizeSubtypes = true; + return this; + } + + /** + * Registers {@code type} identified by {@code label}. Labels are case sensitive. + * + * @throws IllegalArgumentException if either {@code type} or {@code label} have already been + * registered on this type adapter. + */ + @CanIgnoreReturnValue + public RuntimeTypeAdapterFactory registerSubtype(Class type, String label) { + if (type == null || label == null) { + throw new NullPointerException(); + } + if (subtypeToLabel.containsKey(type) || labelToSubtype.containsKey(label)) { + throw new IllegalArgumentException("types and labels must be unique"); + } + labelToSubtype.put(label, type); + subtypeToLabel.put(type, label); + return this; + } + + /** + * Registers {@code type} identified by its {@link Class#getSimpleName simple name}. Labels are + * case sensitive. + * + * @throws IllegalArgumentException if either {@code type} or its simple name have already been + * registered on this type adapter. + */ + @CanIgnoreReturnValue + public RuntimeTypeAdapterFactory registerSubtype(Class type) { + return registerSubtype(type, type.getSimpleName()); + } + + @Override + public TypeAdapter create(Gson gson, TypeToken type) { + if (type == null) { + return null; + } + Class rawType = type.getRawType(); + boolean handle = + recognizeSubtypes ? baseType.isAssignableFrom(rawType) : baseType.equals(rawType); + if (!handle) { + return null; + } + + TypeAdapter jsonElementAdapter = gson.getAdapter(JsonElement.class); + Map> labelToDelegate = new LinkedHashMap<>(); + Map, TypeAdapter> subtypeToDelegate = new LinkedHashMap<>(); + for (Map.Entry> entry : labelToSubtype.entrySet()) { + TypeAdapter delegate = gson.getDelegateAdapter(this, TypeToken.get(entry.getValue())); + labelToDelegate.put(entry.getKey(), delegate); + subtypeToDelegate.put(entry.getValue(), delegate); + } + + return new TypeAdapter() { + @Override + public R read(JsonReader in) throws IOException { + JsonElement jsonElement = jsonElementAdapter.read(in); + JsonElement labelJsonElement; + if (maintainType) { + labelJsonElement = jsonElement.getAsJsonObject().get(typeFieldName); + } else { + labelJsonElement = jsonElement.getAsJsonObject().remove(typeFieldName); + } + + if (labelJsonElement == null) { + throw new JsonParseException( + "cannot deserialize " + + baseType + + " because it does not define a field named " + + typeFieldName); + } + String label = labelJsonElement.getAsString(); + @SuppressWarnings("unchecked") // registration requires that subtype extends T + TypeAdapter delegate = (TypeAdapter) labelToDelegate.get(label); + if (delegate == null) { + throw new JsonParseException( + "cannot deserialize " + + baseType + + " subtype named " + + label + + "; did you forget to register a subtype?"); + } + return delegate.fromJsonTree(jsonElement); + } + + @Override + public void write(JsonWriter out, R value) throws IOException { + Class srcType = value.getClass(); + String label = subtypeToLabel.get(srcType); + @SuppressWarnings("unchecked") // registration requires that subtype extends T + TypeAdapter delegate = (TypeAdapter) subtypeToDelegate.get(srcType); + if (delegate == null) { + throw new JsonParseException( + "cannot serialize " + srcType.getName() + "; did you forget to register a subtype?"); + } + JsonObject jsonObject = delegate.toJsonTree(value).getAsJsonObject(); + + if (maintainType) { + jsonElementAdapter.write(out, jsonObject); + return; + } + + JsonObject clone = new JsonObject(); + + if (jsonObject.has(typeFieldName)) { + throw new JsonParseException( + "cannot serialize " + + srcType.getName() + + " because it already defines a field named " + + typeFieldName); + } + clone.add(typeFieldName, new JsonPrimitive(label)); + + for (Map.Entry e : jsonObject.entrySet()) { + clone.add(e.getKey(), e.getValue()); + } + jsonElementAdapter.write(out, clone); + } + }.nullSafe(); + } +} diff --git a/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy b/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy index 3ba3bd04aa..1551f97818 100644 --- a/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy @@ -23,19 +23,23 @@ import com.google.gson.Gson import com.google.gson.GsonBuilder import groovy.transform.CompileStatic import groovy.transform.Memoized +import nextflow.serde.gson.InstantAdapter /** * Implements helper for Gson ser-deserialization - * + * + * Deprecated. Use {@link nextflow.serde.gson.GsonEncoder} instead + * * @author Paolo Di Tommaso */ +@Deprecated @CompileStatic class GsonHelper { @Memoized static protected Gson gson() { new GsonBuilder() - .registerTypeAdapter(Instant, new GsonInstantAdapter()) + .registerTypeAdapter(Instant, new InstantAdapter()) .create() } diff --git a/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy b/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy new file mode 100644 index 0000000000..f9a42c1896 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy @@ -0,0 +1,58 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util + +import java.lang.reflect.ParameterizedType +import java.lang.reflect.Type + +import groovy.transform.CompileStatic + +/** + * A utility class that provides helper methods for working with generic types at runtime. + *

+ * This class is designed to extract type information from objects that have generic superclasses. + *

+ * + * @author Paolo Di Tommaso + */ +@CompileStatic +class TypeHelper { + + /** + * Retrieves the generic type at the specified index from the given object's superclass. + * + *

This method assumes that the object's class extends a parameterized type, + * and it returns the type argument at the given index.

+ * + * @param object the object whose generic type is to be retrieved + * @param index the index of the generic type parameter (starting from 0) + * @return the {@link Type} representing the generic type at the specified index + * + * @example + *
+     * class ExampleClass extends GenericBase<String, Integer> {}
+     *
+     * Type type = TypeHelper.getGenericType(new ExampleClass(), 1);
+     * System.out.println(type); // Output: class java.lang.Integer
+     * 
+ */ + static Type getGenericType(Object object, int index) { + final params = (ParameterizedType) (object.getClass().getGenericSuperclass()); + return params.getActualTypeArguments()[index] + } + +} diff --git a/modules/nf-commons/src/test/nextflow/serde/GsonEncoderTest.groovy b/modules/nf-commons/src/test/nextflow/serde/GsonEncoderTest.groovy new file mode 100644 index 0000000000..87327c082b --- /dev/null +++ b/modules/nf-commons/src/test/nextflow/serde/GsonEncoderTest.groovy @@ -0,0 +1,81 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde + +import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset + +import groovy.transform.EqualsAndHashCode +import nextflow.serde.gson.GsonEncoder +import spock.lang.Specification +/** + * + * @author Paolo Di Tommaso + */ +class GsonEncoderTest extends Specification { + + @EqualsAndHashCode + static class Foo { + String name + Instant timestamp + OffsetDateTime datetime + } + + def 'should serialize-deserialize an object' () { + given: + def encoder = new GsonEncoder() { } + def ts = Instant.ofEpochSecond(1742638384) + def dt = ts.atOffset(ZoneOffset.UTC) + def foo = new Foo(name:'Yo!', timestamp: ts, datetime: dt) + when: + def json = encoder.encode(foo) + then: + json == '{"name":"Yo!","timestamp":"2025-03-22T10:13:04Z","datetime":"2025-03-22T10:13:04Z"}' + encoder.decode(json) == foo + } + + def 'should encode and decode polymorphic class/1'() { + given: + def encoder = new MyEncoder() + def dog = new Dog("bau", 10) + when: + def json = encoder.encode(dog) + then: + json == '{"@type":"Dog","name":"bau","barkVolume":10}' + + when: + def animal = encoder.decode(json) + then: + animal == dog + } + + def 'should encode and decode polymorphic class/1'() { + given: + def encoder = new MyEncoder() + def dog = new Cat("bau", true) + when: + def json = encoder.encode(dog) + then: + json == '{"@type":"Cat","name":"bau","likesSun":true}' + + when: + def animal = encoder.decode(json) + then: + animal == dog + } +} diff --git a/modules/nf-commons/src/test/nextflow/serde/MyEncoder.groovy b/modules/nf-commons/src/test/nextflow/serde/MyEncoder.groovy new file mode 100644 index 0000000000..d223c1bee3 --- /dev/null +++ b/modules/nf-commons/src/test/nextflow/serde/MyEncoder.groovy @@ -0,0 +1,69 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde + + +import groovy.transform.CompileStatic +import groovy.transform.EqualsAndHashCode +import nextflow.serde.gson.GsonEncoder +import nextflow.serde.gson.RuntimeTypeAdapterFactory +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class MyEncoder extends GsonEncoder { + + MyEncoder() { + withTypeAdapterFactory( + RuntimeTypeAdapterFactory.of(JsonSerializable.class, "@type") + .registerSubtype(Dog.class, "Dog") + .registerSubtype(Cat.class, "Cat") + ) + } + +} + +@EqualsAndHashCode +class Dog implements JsonSerializable { + private final String name; + int barkVolume; + + Dog(String name, int barkVolume) { + this.name = name; + this.barkVolume = barkVolume; + } + + String getName() { + return name; + } +} + +@EqualsAndHashCode +class Cat implements JsonSerializable { + private final String name; + boolean likesSun; + + Cat(String name, boolean likesSun) { + this.name = name; + this.likesSun = likesSun; + } + + String getName() { + return name; + } +} diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy index 66239e08af..edfb211ed5 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy @@ -17,7 +17,6 @@ package nextflow.data.cid.h2 - import java.sql.Clob import com.zaxxer.hikari.HikariDataSource @@ -26,6 +25,8 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.data.cid.CidHistoryLog import nextflow.data.cid.CidStore +import nextflow.data.cid.serde.CidEncoder +import nextflow.data.cid.serde.CidSerializable import nextflow.data.config.DataConfig import nextflow.data.config.DataStoreOpts import nextflow.util.TestOnly @@ -38,11 +39,13 @@ import nextflow.util.TestOnly class H2CidStore implements CidStore { private HikariDataSource dataSource + private CidEncoder encoder @Override H2CidStore open(DataConfig config) { assert config.store.location.startsWith('jdbc:h2:') log.info "Connecting CID H2 store: '${config.store.location}'" + encoder = new CidEncoder() dataSource = createDataSource(config.store) // create the db tables createDbTables(dataSource) @@ -90,19 +93,20 @@ class H2CidStore implements CidStore { } @Override - void save(String key, Object value) { + void save(String key, CidSerializable object) { + final value = encoder.encode(object) try(final sql=new Sql(dataSource)) { sql.execute(""" INSERT INTO cid_file (path, metadata) VALUES (?, ?) - """, [key, value]) + """, [key, (Object)value]) } } @Override - Object load(String key) { + CidSerializable load(String key) { try(final sql=new Sql(dataSource)) { final result = sql.firstRow("SELECT metadata FROM cid_file WHERE path = ?", List.of(key)) - return result ? toValue(result.metadata) : null + return result ? encoder.decode(toValue(result.metadata).toString()) : null } } diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy index 601840e635..db346b7767 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy @@ -17,10 +17,11 @@ package nextflow.data.cid.h2 +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.TaskOutput import nextflow.data.config.DataConfig import spock.lang.Shared import spock.lang.Specification - /** * * @author Paolo Di Tommaso @@ -41,10 +42,12 @@ class H2CidStoreTest extends Specification { } def 'should store and get a value' () { + given: + def value = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) when: - store.save('/some/key', 'Hello world') + store.save('/some/key', value) then: - store.load('/some/key') == 'Hello world' + store.load('/some/key').toString() == value.toString() } } From 6b3293bf544298fa0996c3eb71a75f1fdf0c64ef Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Wed, 2 Apr 2025 11:10:45 +0200 Subject: [PATCH 30/72] PoC for CID store annotations and workflow outputs structure (#5885) Signed-off-by: jorgee Signed-off-by: Jorge Ejarque Signed-off-by: Ben Sherman Signed-off-by: Paolo Di Tommaso Co-authored-by: Ben Sherman Co-authored-by: Paolo Di Tommaso --- build.gradle | 6 +- .../src/main/groovy/nextflow/Session.groovy | 8 +- .../main/groovy/nextflow/cli/CmdCid.groovy | 30 +++ .../nextflow/extension/PublishOp.groovy | 42 ++-- .../nextflow/processor/PublishDir.groovy | 10 +- .../groovy/nextflow/script/OutputDsl.groovy | 10 +- .../nextflow/trace/TraceObserver.groovy | 19 +- .../groovy/nextflow/cli/CmdCidTest.groovy | 53 +++- .../nextflow/script/OutputDslTest.groovy | 6 +- .../main/nextflow/data/cid/CidObserver.groovy | 150 ++++++----- .../main/nextflow/data/cid/CidStore.groovy | 7 + .../main/nextflow/data/cid/CidUtils.groovy | 233 ++++++++++++++++++ .../nextflow/data/cid/DefaultCidStore.groovy | 59 ++++- .../data/cid/cli/CidCommandImpl.groovy | 95 ++++++- .../nextflow/data/cid/fs/CidFileSystem.groovy | 7 +- .../data/cid/fs/CidFileSystemProvider.groovy | 36 ++- .../main/nextflow/data/cid/fs/CidPath.groovy | 128 +++++++--- .../data/cid/fs/CidPathFactory.groovy | 9 +- .../data/cid/fs/CidResultsPath.groovy | 80 ++++++ .../cid/fs/ResultsSeekableByteChannel.groovy | 77 ++++++ .../nextflow/data/cid/model/Output.groovy | 6 +- .../data/cid/model/TaskResults.groovy | 37 +++ .../data/cid/model/WorkflowOutput.groovy | 1 + .../data/cid/model/WorkflowResults.groovy | 5 +- .../nextflow/data/cid/serde/CidEncoder.groovy | 20 +- .../nextflow/data/cid/CidObserverTest.groovy | 15 +- .../nextflow/data/cid/CidUtilsTest.groovy | 165 +++++++++++++ .../data/cid/DefaultCidStoreTest.groovy | 37 +++ .../nextflow/data/cid/fs/CidPathTest.groovy | 93 ++++++- .../data/cid/serde/CidEncoderTest.groovy | 133 +++++++++- .../nextflow/serde/gson/GsonEncoder.groovy | 9 + .../nextflow/extension/PublishOpS3Test.groovy | 2 +- .../nextflow/data/cid/h2/H2CidStore.groovy | 35 +++ .../data/cid/h2/H2CidStoreTest.groovy | 32 +++ settings.gradle | 1 + 35 files changed, 1461 insertions(+), 195 deletions(-) create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/fs/CidResultsPath.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/model/TaskResults.groovy create mode 100644 modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy diff --git a/build.gradle b/build.gradle index 9315efa9a0..5996479dad 100644 --- a/build.gradle +++ b/build.gradle @@ -237,7 +237,7 @@ task compile { def getRuntimeConfigs() { def names = subprojects - .findAll { prj -> prj.name in ['nextflow','nf-cid','nf-commons','nf-httpfs','nf-lang'] } + .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-cid'] } .collect { it.name } FileCollection result = null @@ -263,7 +263,7 @@ task exportClasspath { def home = System.getProperty('user.home') def all = getRuntimeConfigs() def libs = all.collect { File file -> /*println file.canonicalPath.replace(home, '$HOME');*/ file.canonicalPath; } - ['nextflow','nf-cid','nf-commons','nf-httpfs','nf-lang'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } + ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-cid'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } file('.launch.classpath').text = libs.unique().join(':') } } @@ -276,7 +276,7 @@ ext.nexusEmail = project.findProperty('nexusEmail') // `signing.keyId` property needs to be defined in the `gradle.properties` file ext.enableSignArchives = project.findProperty('signing.keyId') -ext.coreProjects = projects( ':nextflow', ':nf-cid', ':nf-commons', ':nf-httpfs', ':nf-lang' ) +ext.coreProjects = projects( ':nextflow', ':nf-commons', ':nf-httpfs', ':nf-lang', ':nf-cid' ) configure(coreProjects) { group = 'io.nextflow' diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 09152bc301..5c7dca1e7a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -1126,10 +1126,10 @@ class Session implements ISession { } } - void notifyWorkflowPublish(Object value) { + void notifyWorkflowPublish(String name, Object value) { for( final observer : observers ) { try { - observer.onWorkflowPublish(value) + observer.onWorkflowPublish(name, value) } catch( Exception e ) { log.error "Failed to invoke observer on workflow publish: $observer", e @@ -1137,11 +1137,11 @@ class Session implements ISession { } } - void notifyFilePublish(Path destination, Path source=null) { + void notifyFilePublish(Path destination, Path source, Map annotations) { def copy = new ArrayList(observers) for( TraceObserver observer : copy ) { try { - observer.onFilePublish(destination, source) + observer.onFilePublish(destination, source, annotations) } catch( Exception e ) { log.error "Failed to invoke observer on file publish: $observer", e diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index 677887c43c..dfa47d4347 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -43,6 +43,7 @@ class CmdCid extends CmdBase implements UsageAware { void log(ConfigMap config) void show(ConfigMap config, List args) void lineage(ConfigMap config, List args) + void diff(ConfigMap config, List args) } interface SubCmd { @@ -62,6 +63,7 @@ class CmdCid extends CmdBase implements UsageAware { commands << new CmdLog() commands << new CmdShow() commands << new CmdLineage() + commands << new CmdDiff() } @Parameter(hidden = true) @@ -228,4 +230,32 @@ class CmdCid extends CmdBase implements UsageAware { } } + + class CmdDiff implements SubCmd { + + @Override + String getName() { 'diff' } + + @Override + String getDescription() { + return 'Show differences between two CID descriptions' + } + + void apply(List args) { + if (args.size() != 2) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + operation.diff(config, args) + } + + @Override + void usage() { + println description + println "Usage: nextflow $NAME $name " + } + + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index cc71dbb0b2..13997c81d5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -38,6 +38,8 @@ class PublishOp { private Session session + private String name + private DataflowReadChannel source private Map opts @@ -52,8 +54,9 @@ class PublishOp { private volatile boolean complete - PublishOp(Session session, DataflowReadChannel source, Map opts) { + PublishOp(Session session, String name, DataflowReadChannel source, Map opts) { this.session = session + this.name = name this.source = source this.opts = opts this.path = opts.path as String @@ -89,13 +92,14 @@ class PublishOp { if( targetResolver == null ) return - // emit workflow publish event - session.notifyWorkflowPublish(value) - // create publisher final overrides = targetResolver instanceof Closure ? [saveAs: targetResolver] : [path: targetResolver] + if (opts.annotations instanceof Closure){ + final annotations = opts.annotations as Closure + overrides.annotations = annotations.call(value) as Map + } final publisher = PublishDir.create(opts + overrides) // publish files @@ -106,13 +110,10 @@ class PublishOp { publisher.apply(files, sourceDir) } - // append record to index file - if( indexOpts ) { - final record = indexOpts.mapper != null ? indexOpts.mapper.call(value) : value - final normalized = normalizePaths(record, targetResolver) - log.trace "Normalized record for index file: ${normalized}" - indexRecords << normalized - } + // append record to index + final normalized = normalizePaths(value, targetResolver) + log.trace "Normalized record for index file: ${normalized}" + indexRecords << normalized } /** @@ -151,12 +152,21 @@ class PublishOp { } /** - * Once all values have been published, write the - * index file (if enabled). + * Once all values have been published, publish the index + * and write it to a file (if enabled). */ protected void onComplete(nope) { - if( indexOpts && indexRecords.size() > 0 ) { - log.trace "Saving records to index file: ${indexRecords}" + // publish individual record if source is a value channel + final index = CH.isValue(source) + ? indexRecords.first() + : indexRecords + + // publish workflow output + session.notifyWorkflowPublish(name, index) + + // write index file + if( indexOpts && index ) { + log.trace "Saving records to index file: ${index}" final indexPath = indexOpts.path final ext = indexPath.getExtension() indexPath.parent.mkdirs() @@ -169,7 +179,7 @@ class PublishOp { else { log.warn "Invalid extension '${ext}' for index file '${indexPath}' -- should be 'csv' or 'json'" } - session.notifyFilePublish(indexPath) + session.notifyFilePublish(indexPath, null, opts.tags as Map) } log.trace "Publish operator complete" diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 6d0335f9be..1f519d2a2f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -109,6 +109,11 @@ class PublishDir { */ private def tags + /** + * Annotations to be associated to the target file + */ + private Map annotations + /** * The content type of the file. Currently only supported by AWS S3. * This can be either a MIME type content type string or a Boolean value @@ -211,6 +216,9 @@ class PublishDir { if( params.tags != null ) result.tags = params.tags + if( params.annotations != null ) + result.annotations = params.annotations as Map + if( params.contentType instanceof Boolean ) result.contentType = params.contentType else if( params.contentType ) @@ -581,7 +589,7 @@ class PublishDir { } protected void notifyFilePublish(Path destination, Path source=null) { - session.notifyFilePublish(destination, source) + session.notifyFilePublish(destination, source, annotations) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy index be87ce96f9..72077de433 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy @@ -84,7 +84,7 @@ class OutputDsl { final opts = publishOptions(name, defaults, overrides) if( opts.enabled == null || opts.enabled ) - ops << new PublishOp(session, CH.getReadChannel(mixed), opts).apply() + ops << new PublishOp(session, name, CH.getReadChannel(mixed), opts).apply() } } @@ -171,6 +171,14 @@ class OutputDsl { setOption('tags', value) } + void annotations(Map value) { + setOption('annotations', value) + } + + void annotations(Closure value) { + setOption('annotations', value) + } + private void setOption(String name, Object value) { if( opts.containsKey(name) ) throw new ScriptRuntimeException("Publish option `${name}` cannot be defined more than once for a given target") diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy index 16f844643d..147b270074 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy @@ -123,11 +123,15 @@ trait TraceObserver { void onFlowError(TaskHandler handler, TraceRecord trace){} /** - * Method that is invoked when a value is published from a channel. + * Method that is invoked when a workflow output is published. * + * @param name + * The name of the workflow output * @param value + * A list if the published channel was a queue channel, + * otherwise an object if the channel was a value channel */ - void onWorkflowPublish(Object value){} + void onWorkflowPublish(String name, Object value){} /** * Method that is invoke when an output file is published @@ -150,4 +154,15 @@ trait TraceObserver { void onFilePublish(Path destination, Path source){ onFilePublish(destination) } + /** + * Method that is invoked when a output file is annotated + * @param destination + * The destination path at `publishDir` folder. + * @param annotations + * The annotations attached to this file + */ + void onFilePublish(Path destination, Path source, Map annotations){ + onFilePublish(destination, source) + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index c8f31eed34..f94f7376e3 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -17,6 +17,8 @@ package nextflow.cli +import nextflow.data.cid.serde.CidEncoder + import java.nio.file.Files import nextflow.SysEnv @@ -28,11 +30,13 @@ import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.TaskOutput import nextflow.data.cid.model.TaskRun import nextflow.data.cid.model.WorkflowOutput -import nextflow.data.cid.serde.CidEncoder import nextflow.plugin.Plugins import org.junit.Rule import spock.lang.Specification import test.OutputCapture + +import java.time.Instant + /** * CLI cid Tests * @@ -132,9 +136,10 @@ class CmdCidTest extends Specification { def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } + def time = Instant.ofEpochMilli(123456789).toString() def encoder = new CidEncoder().withPrettyPrint(true) def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", 1234, 123456789, 123456789, null) + "cid://123987/file.bam", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer cidFile.text = jsonSer @@ -177,7 +182,7 @@ class CmdCidTest extends Specification { then: stdout.size() == 1 - stdout[0] == "No entry found for cid://12345." + stdout[0] == "No entries found for cid://12345." cleanup: folder?.deleteDir() @@ -203,11 +208,12 @@ class CmdCidTest extends Specification { Files.createDirectories(cidFile4.parent) Files.createDirectories(cidFile5.parent) def encoder = new CidEncoder() + def time = Instant.ofEpochMilli(123456789).toString() def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", 1234, 123456789, 123456789, null) + "cid://123987/file.bam", 1234, time, time, null) cidFile.text = encoder.encode(entry) entry = new TaskOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987", 1234, 123456789, 123456789, null) + "cid://123987", 1234, time, time, null) cidFile2.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), [new Parameter( "ValueInParam", "sample_id","ggal_gut"), @@ -215,7 +221,7 @@ class CmdCidTest extends Specification { null, null, null, null, [:],[], null) cidFile3.text = encoder.encode(entry) entry = new TaskOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://45678", 1234, 123456789, 123456789, null) + "cid://45678", 1234, time, time, null) cidFile4.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), null,null, null, null, null, [:],[], null) @@ -258,4 +264,39 @@ class CmdCidTest extends Specification { } + def 'should show query results'(){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() + def cidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def encoder = new CidEncoder().withPrettyPrint(true) + def time = Instant.ofEpochMilli(123456789).toString() + def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + cidFile.text = jsonSer + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid:///?type=WorkflowOutput"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + + cleanup: + folder?.deleteDir() + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy index 16e9751bda..44e5aad0b1 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy @@ -77,9 +77,9 @@ class OutputDslTest extends Specification { "file2","${outputDir}/barbar/file2.txt" """.stripIndent() and: - 1 * session.notifyFilePublish(outputDir.resolve('foo/file1.txt'), file1) - 1 * session.notifyFilePublish(outputDir.resolve('barbar/file2.txt'), file2) - 1 * session.notifyFilePublish(outputDir.resolve('index.csv')) + 1 * session.notifyFilePublish(outputDir.resolve('foo/file1.txt'), file1, null) + 1 * session.notifyFilePublish(outputDir.resolve('barbar/file2.txt'), file2, null) + 1 * session.notifyFilePublish(outputDir.resolve('index.csv'), null, null) cleanup: SysEnv.pop() diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index 9182112e16..8ffcb5960a 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import static nextflow.data.cid.fs.CidPath.* import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes +import java.time.Instant import groovy.transform.CompileStatic import groovy.util.logging.Slf4j @@ -30,6 +31,7 @@ import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.TaskResults import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowOutput import nextflow.data.cid.model.WorkflowResults @@ -44,6 +46,7 @@ import nextflow.script.params.DefaultInParam import nextflow.script.params.FileInParam import nextflow.script.params.FileOutParam import nextflow.script.params.InParam +import nextflow.script.params.OutParam import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord import nextflow.util.CacheHelper @@ -80,20 +83,23 @@ class CidObserver implements TraceObserver { @Override void onFlowBegin() { - this.executionHash = storeWorkflowRun() + executionHash = storeWorkflowRun() + final executionUri = asUriString(executionHash) workflowResults = new WorkflowResults( - "$CID_PROT${executionHash}", - new ArrayList()) - this.store.getHistoryLog().updateRunCid(session.uniqueId, "${CID_PROT}${this.executionHash}") + Instant.now().toString(), + executionUri, + new HashMap() + ) + this.store.getHistoryLog().updateRunCid(session.uniqueId, executionUri) } @Override void onFlowComplete(){ if (this.workflowResults){ - final json = encoder.encode(workflowResults) - final wfResultsHash = CacheHelper.hasher(json).hash().toString() - this.store.save(wfResultsHash, workflowResults) - this.store.getHistoryLog().updateResultsCid(session.uniqueId, "${CID_PROT}${wfResultsHash}") + workflowResults.creationTime = System.currentTimeMillis() + final key = CacheHelper.hasher(workflowResults).hash().toString() + this.store.save("${key}", workflowResults) + this.store.getHistoryLog().updateResultsCid(session.uniqueId, asUriString(key)) } } @@ -130,9 +136,7 @@ class CidObserver implements TraceObserver { session.runName, getNormalizedParams(session.params, normalizer) ) - - final json = encoder.encode(value) - final executionHash = CacheHelper.hasher(json).hash().toString() + final executionHash = CacheHelper.hasher(value).hash().toString() store.save(executionHash, value) return executionHash } @@ -160,19 +164,36 @@ class CidObserver implements TraceObserver { final pathNormalizer = new PathNormalizer(session.workflowMetadata) // store the task run entry storeTaskRun(task, pathNormalizer) - // store all task outputs files - final outputs = task.getOutputsByType(FileOutParam) - outputs.forEach { FileOutParam key, Object value -> manageFileOutParams(value, task)} + // store all task results + storeTaskResults(task) + } + protected String storeTaskResults(TaskRun task ){ + final outputs = task.getOutputs() + final outputParams = new LinkedList() + outputs.forEach { OutParam key, Object value -> + if (key instanceof FileOutParam) { + outputParams.add(new Parameter(key.class.simpleName, key.name, manageFileOutParams(value, task))) + } else { + outputParams.add(new Parameter(key.class.simpleName, key.name, value) ) + } + } + final value = new TaskResults(asUriString(task.hash.toString()), asUriString(executionHash), Instant.now().toString(), outputParams) + final key = CacheHelper.hasher(value).hash().toString() + store.save(key,value) + return key } - private void manageFileOutParams( Object value, TaskRun task) { + private Object manageFileOutParams( Object value, TaskRun task) { if (value instanceof Path) { - storeTaskOutput(task, (Path) value) - } else if (value instanceof Collection) { + return asUriString(storeTaskOutput(task, (Path) value)) + } + if (value instanceof Collection) { + final files = new LinkedList() for (Path it : value) { - storeTaskOutput(task, (Path) it) + files.add( asUriString(storeTaskOutput(task, (Path)it)) ) } + return files } } @@ -202,7 +223,7 @@ class CidObserver implements TraceObserver { return key } - protected void storeTaskOutput(TaskRun task, Path path) { + protected String storeTaskOutput(TaskRun task, Path path) { try { final attrs = readAttributes(path) final rel = getTaskRelative(task, path) @@ -213,11 +234,12 @@ class CidObserver implements TraceObserver { final value = new TaskOutput( path.toUriString(), checksum, - "$CID_PROT$task.hash", + asUriString(task.hash.toString()), attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis()) + CidUtils.toDate(attrs?.creationTime()), + CidUtils.toDate(attrs?.lastModifiedTime())) store.save(key, value) + return key } catch (Throwable e) { log.warn("Exception storing CID output $path for task ${task.name}. ${e.getLocalizedMessage()}") } @@ -249,7 +271,7 @@ class CidObserver implements TraceObserver { if( storeDir && path.startsWith(storeDir)) { final rel = storeDir.relativize(path) //If output stored in storeDir, keep the path in case it is used as workflow output - this.outputsStoreDirCid.put(path.toString(), "$CID_PROT${task.hash}/$rel".toString()) + this.outputsStoreDirCid.put(path.toString(), asUriString(task.hash.toString(),rel.toString())) return rel } } @@ -259,7 +281,11 @@ class CidObserver implements TraceObserver { } @Override - void onFilePublish(Path destination, Path source){ + void onFilePublish(Path destination, Path source) { + storePublishedFile(destination, source) + } + + protected void storePublishedFile(Path destination, Path source = null, Map annotations = null){ try { final checksum = new Checksum( CacheHelper.hasher(destination).hash().toString(), @@ -267,21 +293,22 @@ class CidObserver implements TraceObserver { CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) final rel = getWorkflowRelative(destination) - final key = "$executionHash/${rel}" as String - final sourceReference = getSourceReference(source) + final key = "$executionHash/${rel}" + + final sourceReference = source ? getSourceReference(source) : asUriString(executionHash) final attrs = readAttributes(destination) final value = new WorkflowOutput( destination.toUriString(), checksum, sourceReference, attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis()) + CidUtils.toDate(attrs?.creationTime()), + CidUtils.toDate(attrs?.lastModifiedTime()), + annotations) + value.publishedBy = asUriString(executionHash) store.save(key, value) - workflowResults.outputs.add("${CID_PROT}${key}".toString()) - } - catch (Throwable e) { - log.warn("Exception storing CID output $destination for workflow ${executionHash}.", e) + } catch (Throwable e) { + log.warn("Exception storing published file $destination for workflow ${executionHash}.", e) } } @@ -289,38 +316,43 @@ class CidObserver implements TraceObserver { final hash = FileHelper.getTaskHashFromPath(source, session.workDir) if (hash) { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() - return "$CID_PROT$hash/$target" - } else { - final storeDirReference = outputsStoreDirCid.get(source.toString()) - if (storeDirReference) - return "$CID_PROT$storeDirReference" + return asUriString(hash.toString(), target) } - return null + final storeDirReference = outputsStoreDirCid.get(source.toString()) + return storeDirReference ? asUriString(storeDirReference) : null } @Override void onFilePublish(Path destination){ - try { - final checksum = new Checksum( - CacheHelper.hasher(destination).hash().toString(), - "nextflow", - CacheHelper.HashMode.DEFAULT().toString().toLowerCase() - ) - final rel = getWorkflowRelative(destination) - final key = "$executionHash/${rel}" - final attrs = readAttributes(destination) - final value = new WorkflowOutput( - destination.toUriString(), - checksum, - "${CID_PROT}${executionHash}".toString(), - attrs.size(), - attrs.creationTime().toMillis(), - attrs.lastModifiedTime().toMillis()) - store.save(key, value) - workflowResults.outputs.add("${CID_PROT}${key}" as String) - }catch (Throwable e) { - log.warn("Exception storing CID output $destination for workflow ${executionHash}. ${e.getLocalizedMessage()}") + storePublishedFile (destination) + } + + @Override + void onWorkflowPublish(String name, Object value){ + workflowResults.outputs.put(name,convertPathsToCidReferences(value)) + } + + private Object convertPathsToCidReferences(Object value){ + if( value instanceof Path ) { + final rel = getWorkflowRelative(value) + return rel ? asUriString(executionHash, rel) : value + } + + if( value instanceof Collection ) { + return value.collect { el -> convertPathsToCidReferences(el) } } + + if( value instanceof Map ) { + return value + .findAll { k, v -> v != null } + .collectEntries { k, v -> Map.entry(k, convertPathsToCidReferences(v)) } + } + return value + } + + @Override + void onFilePublish(Path destination, Path source, Map annotations){ + storePublishedFile( destination, source, annotations) } protected String getWorkflowRelative(Path path){ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy index 058cad7d47..613f639662 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy @@ -53,4 +53,11 @@ interface CidStore extends Closeable { */ CidHistoryLog getHistoryLog() + /** + * Search for cid entries. + * @queryString Json-path like query string. (Only simple and nested field operators are supported(No array, wildcards,etc.) + * @return List of Cid object's fulfilling the queryString + */ + List search(String queryString) + } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy new file mode 100644 index 0000000000..96f37858a9 --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy @@ -0,0 +1,233 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.data.cid.fs.CidPath +import nextflow.data.cid.serde.CidSerializable + +import java.nio.file.Path +import java.nio.file.attribute.FileTime +import java.time.Instant + +/** + * Utils class for CID. + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class CidUtils { + /** + * Query a CID store. + * @param store CID store to query. + * @param uri Query to perform in a URI-like format. + * Format 'cid://[?QueryString][#fragment]' where: + * - Key: Element where the query will be applied. '/' indicates query will be applied in all the elements of the CID store. + * - QueryString: all param-value pairs that the CID element should fulfill in a URI's query string format. + * - Fragment: Element fragment to retrieve. + * @return List of object fulfilling the query + */ + static List query(CidStore store, URI uri) { + String key = uri.authority ? uri.authority + uri.path : uri.path + try { + if (key == CidPath.SEPARATOR) { + final results = store.search(uri.query) + if (results && uri.fragment){ + // If fragment is defined get the property of the object indicated by the fragment + final filteredResults = [] + results.forEach { + final output = navigate(it, uri.fragment) + if (output){ + filteredResults.add(output) + } + } + return filteredResults + } + return results + } else { + final parameters = uri.query ? parseQuery(uri.query) : null + final children = parseChildrenFormFragment(uri.fragment) + return searchPath(store, key, parameters, children ) + } + } catch(Throwable e){ + log.debug("Exception querying $uri. $e.message") + return [] + } + + } + + /** + * Get the array of the search path children elements from the fragment string + * @param fragment String containing the elements separated by '.' + * @return array with the parsed element + */ + static String[] parseChildrenFormFragment(String fragment) { + if (fragment) { + if (fragment.contains('.')) { + return fragment.split("\\.") + } else { + return [fragment] as String[] + } + } else { + return [] as String[] + } + } + /** + * Search for objects inside a description + * @param store CID store + * @param key CID key where to perform the search + * @param params Parameter-value pairs to be evaluated in the key + * @param children Sub-objects to evaluate and retrieve + * @return List of object + */ + protected static List searchPath(CidStore store, String key, Map params, String[] children = []) { + final results = new LinkedList() + final object = store.load(key) + if (object) { + if (children && children.size() > 0) { + final output = navigate(object, children.join('.')) + if (output) { + treatObject(output, params, results) + } else { + throw new FileNotFoundException("Cid object $key/${children ? children.join('/') : ''} not found.") + } + } else { + treatObject(object, params, results) + } + } else { + // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output + final currentPath = Path.of(key) + final parent = currentPath.getParent() + if (parent) { + ArrayList newChildren = new ArrayList() + newChildren.add(currentPath.getFileName().toString()) + newChildren.addAll(children) + return searchPath(store, parent.toString(), params, newChildren as String[]) + } else { + throw new FileNotFoundException("Cid object $key/${children ? children.join('/') : ''} not found.") + } + } + return results + } + + /** + * Evaluates object or the objects in a collection matches a set of parameter-value pairs. It includes in the results collection in case of match. + * @param object Object or collection of objects to evaluate + * @param params parameter-value pairs to evaluate in each object + * @param results results collection to include the matching objects + */ + protected static void treatObject(def object, Map params, List results) { + if (params) { + if (object instanceof Collection) { + (object as Collection).forEach { treatObject(it, params, results) } + } else if (checkParams(object, params)) { + results.add(object) + } + } else { + results.add(object) + } + } + /** + * Parses a query string and store them in parameter-value Map. + * @param queryString URI-like query string. (e.g. param1=value1¶m2=value2). + * @return Map containing the parameter-value pairs of the query string. + */ + static Map parseQuery(String queryString) { + if (queryString) { + return queryString.split('&').collectEntries { + it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } + } as Map + } + return [:] + } + + /** + * Check if an object fullfill the parameter-value + * @param object Object to evaluate + * @param params parameter-value pairs to evaluate + * @return true if all object parameters exist and matches with the value, otherwise false. + */ + static boolean checkParams(Object object, Map params) { + for (final entry : params.entrySet()) { + final value = navigate(object, entry.key) + if (!value || value.toString() != entry.value.toString() ) { + return false + } + } + return true + } + + /** + * Retrieves the sub-object or value indicated by a path. + * @param obj Object to navigate + * @param path Elements path separated by '.' e.g. field.subfield + * @return sub-object / value + */ + static Object navigate(Object obj, String path){ + if (!obj) + return null + try{ + // type has been replaced by class when evaluating CidSerializable objects + if (obj instanceof CidSerializable && path == 'type') + return obj.getClass()?.simpleName + path.tokenize('.').inject(obj) { current, key -> + if (current == null) return null + + if (current instanceof Map) { + return current[key] // Navigate Map properties + } + + if (current.metaClass.hasProperty(current, key)) { + return current.getAt(key) // Navigate Object properties + } + log.trace("No property found for $key") + return null // Property not found + } + } catch (Throwable e) { + log.debug("Error navigating to $path in object", e) + return null + } + } + + /** + * Helper function to convert from FileTime to ISO 8601. + * + * @param time File time to convert + * @return ISO Date format or 'N/A' in case of not available (null) + */ + static String toDate(FileTime time){ + if (time) + return Instant.ofEpochMilli(time.toMillis()).toString() + else + return null + } + + /** + * Helper function to convert from String ISO 8601 to FileTime. + * + * @param date ISO formated time + * @return Converted FileTime or null if date is not available (null or 'N/A') + */ + static FileTime toFileTime(String date){ + if (!date) + return null + return FileTime.from(Instant.parse(date)) + } +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy index 691b4a8e1c..6540ce5818 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy @@ -17,17 +17,22 @@ package nextflow.data.cid +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j + +import java.nio.file.FileVisitResult +import java.nio.file.FileVisitor import java.nio.file.Files import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributes -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j import nextflow.data.cid.serde.CidEncoder import nextflow.data.cid.serde.CidSerializable import nextflow.data.config.DataConfig import nextflow.exception.AbortOperationException import nextflow.file.FileHelper import nextflow.util.TestOnly + /** * Default Implementation for the a CID store. * @@ -77,7 +82,7 @@ class DefaultCidStore implements CidStore { final path = metaLocation.resolve("$key/$METADATA_FILE") log.debug("Loading from path $path") if (path.exists()) - return encoder.decode(path.text) + return encoder.decode(path.text) as CidSerializable log.debug("File for key $key not found") return null } @@ -97,5 +102,51 @@ class DefaultCidStore implements CidStore { } @Override - void close() { } + void close() throws IOException { } + + @Override + List search(String queryString) { + + def params = null + if (queryString) { + params = CidUtils.parseQuery(queryString) + } + return searchAllFiles(params) + + } + + private List searchAllFiles (Map params) { + final results = new LinkedList() + + Files.walkFileTree(metaLocation, new FileVisitor() { + + @Override + FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + FileVisitResult.CONTINUE + } + + @Override + FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (file.name.startsWith('.data.json') ) { + final cidObject = encoder.decode(file.text) + if (CidUtils.checkParams(cidObject, params)){ + results.add(cidObject as CidSerializable) + } + } + FileVisitResult.CONTINUE + } + + @Override + FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + FileVisitResult.CONTINUE + } + + @Override + FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + FileVisitResult.CONTINUE + } + }) + + return results + } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index 821b7a76e9..ebe19eebfa 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -19,6 +19,7 @@ package nextflow.data.cid.cli import static nextflow.data.cid.fs.CidPath.* +import java.nio.charset.StandardCharsets import java.nio.file.Path import groovy.transform.Canonical @@ -30,6 +31,7 @@ import nextflow.dag.MermaidHtmlRenderer import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStore import nextflow.data.cid.CidStoreFactory +import nextflow.data.cid.CidUtils import nextflow.data.cid.model.Output import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.TaskOutput @@ -37,8 +39,14 @@ import nextflow.data.cid.model.TaskRun import nextflow.data.cid.model.WorkflowOutput import nextflow.data.cid.model.WorkflowRun import nextflow.data.cid.serde.CidEncoder +import nextflow.data.cid.serde.CidSerializable import nextflow.script.params.FileInParam +import nextflow.serde.gson.GsonEncoder import nextflow.ui.TableBuilder +import org.eclipse.jgit.diff.DiffAlgorithm +import org.eclipse.jgit.diff.DiffFormatter +import org.eclipse.jgit.diff.RawText +import org.eclipse.jgit.diff.RawTextComparator /** * Implements CID command line operations * @@ -85,18 +93,21 @@ class CidCommandImpl implements CmdCid.CidCommand { @Override void show(ConfigMap config, List args) { - if (!args[0].startsWith(CID_PROT)) + if (!isCidUri(args[0])) throw new Exception("Identifier is not a CID URL") - final key = args[0].substring(CID_PROT.size()) final store = CidStoreFactory.getOrCreate(new Session(config)) - final encoder = new CidEncoder().withPrettyPrint(true) if (store) { try { - final entry = store.load(key) - if( entry ) - println encoder.encode(entry) - else - println "No entry found for ${args[0]}." + def entries = CidUtils.query(store, new URI(args[0])) + if( entries ) { + entries = entries.size() == 1 ? entries[0] : entries + if (entries instanceof CidSerializable) + println new CidEncoder().withPrettyPrint(true).encode(entries as CidSerializable) + else + println new GsonEncoder(){}.withPrettyPrint(true).encode(entries) + } else { + println "No entries found for ${args[0]}." + } } catch (Throwable e) { println "Error loading ${args[0]}. ${e.message}" } @@ -137,7 +148,7 @@ class CidCommandImpl implements CmdCid.CidCommand { } private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { - if (!nodeToRender.startsWith(CID_PROT)) + if (!isCidUri(nodeToRender)) throw new Exception("Identifier is not a CID URL") final key = nodeToRender.substring(CID_PROT.size()) final cidObject = store.load(key) @@ -147,7 +158,7 @@ class CidCommandImpl implements CmdCid.CidCommand { lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); final source = (cidObject as Output).source if (source) { - if (source.startsWith(CID_PROT)) { + if (isCidUri(source)) { nodes.add(source) edges.add(new Edge(source, nodeToRender)) } else { @@ -200,7 +211,7 @@ class CidCommandImpl implements CmdCid.CidCommand { } if (value instanceof CharSequence) { final source = value.toString() - if (source.startsWith(CID_PROT)) { + if (isCidUri(source)) { nodes.add(source) edges.add(new Edge(source, nodeToRender)) return @@ -209,7 +220,7 @@ class CidCommandImpl implements CmdCid.CidCommand { if (value instanceof Map) { if (value.path) { final path = value.path.toString() - if (path.startsWith(CID_PROT)) { + if (isCidUri(path)) { nodes.add(path) edges.add(new Edge(path, nodeToRender)) return @@ -225,4 +236,64 @@ class CidCommandImpl implements CmdCid.CidCommand { lines << " ${value.toString()}@{shape: document, label: \"${label}\"}".toString(); edges.add(new Edge(value.toString(), nodeToRender)) } + + @Override + void diff(ConfigMap config, List args) { + if (!isCidUri(args[0]) || !isCidUri(args[1])) + throw new Exception("Identifier is not a CID URL") + + final store = CidStoreFactory.getOrCreate(new Session(config)) + if (store) { + try { + final key1 = args[0].substring(CID_PROT.size()) + final entry1 = store.load(key1) as String + if( !entry1 ){ + println "No entry found for ${args[0]}." + return + } + final key2 = args[1].substring(CID_PROT.size()) + final entry2 = store.load(key2) as String + if( !entry2 ) { + println "No entry found for ${args[1]}." + return + } + generateDiff(entry1, key1, entry2, key2) + } catch (Throwable e) { + println "Error generating diff between ${args[0]}: $e.message" + } + } else { + println "Error CID store not loaded. Check Nextflow configuration." + } + } + + private static void generateDiff(String entry1, String key1, String entry2, String key2) { + // Convert strings to JGit RawText format + final text1 = new RawText(entry1.getBytes(StandardCharsets.UTF_8)) + final text2 = new RawText(entry2.getBytes(StandardCharsets.UTF_8)) + + // Set up the diff algorithm (Git-style diff) + final diffAlgorithm = DiffAlgorithm.getAlgorithm(DiffAlgorithm.SupportedAlgorithm.MYERS) + final diffComparator = RawTextComparator.DEFAULT + + // Compute the differences + final editList = diffAlgorithm.diff(diffComparator, text1, text2) + + final output = new StringBuilder() + // Add header + output.append("diff --git ${key1} ${key2}\n") + output.append("--- ${key1}\n") + output.append("+++ ${key2}\n") + + // Use DiffFormatter to display results in Git-style format + final outputStream = new ByteArrayOutputStream() + final diffFormatter = new DiffFormatter(outputStream) + diffFormatter.setOldPrefix(key1) + diffFormatter.setNewPrefix(key2) + diffFormatter.format(editList, text1, text2) + output.append(outputStream.toString(StandardCharsets.UTF_8)) + + println output.toString() + } + + } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy index 0f3f39aba4..356183e0b9 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy @@ -111,7 +111,12 @@ class CidFileSystem extends FileSystem { @Override Path getPath(String first, String... more) { - return new CidPath(this,first,more) + final path = more ? CidPath.SEPARATOR + more.join(CidPath. SEPARATOR) : '' + return getPath(CidPath.asUri(CidPath.CID_PROT + first + path)) + } + + Path getPath(URI uri){ + return new CidPath(this, uri) } @Override diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy index 4a255bc6f8..378b9da42b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -101,10 +101,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override CidPath getPath(URI uri) { - // the URI authority holds the base component of the CID path - final base = uri.authority - final path = uri.path - return (CidPath) getFileSystemOrCreate(uri).getPath(base, path) + return (CidPath) ((CidFileSystem) getFileSystemOrCreate(uri)).getPath(uri) } @Override @@ -115,8 +112,11 @@ class CidFileSystemProvider extends FileSystemProvider { @Override InputStream newInputStream(Path path, OpenOption... options) throws IOException { final cid = toCidPath(path) - final realPath = cid.getTargetPath() - realPath.fileSystem.provider().newInputStream(realPath, options) + final realPath = cid.getTargetPath(true) + if (realPath instanceof CidResultsPath) + return (realPath as CidResultsPath).newInputStream() + else + return realPath.fileSystem.provider().newInputStream(realPath, options) } @Override @@ -129,8 +129,13 @@ class CidFileSystemProvider extends FileSystemProvider { throw new UnsupportedOperationException("'$opt' not allowed"); } } - final realPath = cid.getTargetPath() - final channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) + final realPath = cid.getTargetPath(true) + SeekableByteChannel channel + if (realPath instanceof CidResultsPath){ + channel = (realPath as CidResultsPath).newSeekableByteChannel() + } else { + channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) + } new SeekableByteChannel() { @@ -179,7 +184,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override DirectoryStream newDirectoryStream(Path path, DirectoryStream.Filter filter) throws IOException { final cid = toCidPath(path) - final real = cid.getTargetPath() + final real = cid.getTargetPath(false) final stream = real .getFileSystem() .provider() @@ -277,7 +282,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override boolean isHidden(Path path) throws IOException { - return toCidPath(path).getTargetPath().isHidden() + return toCidPath(path).getTargetPath(true).isHidden() } @Override @@ -294,7 +299,9 @@ class CidFileSystemProvider extends FileSystemProvider { if( m == AccessMode.EXECUTE ) throw new AccessDeniedException("Execute mode not supported") } - final real = cid.getTargetPath() + final real = cid.getTargetPath(true) + if (real instanceof CidResultsPath) + return real.fileSystem.provider().checkAccess(real, modes) } @@ -306,8 +313,11 @@ class CidFileSystemProvider extends FileSystemProvider { @Override A readAttributes(Path path, Class type, LinkOption... options) throws IOException { final cid = toCidPath(path) - final real = cid.getTargetPath() - real.fileSystem.provider().readAttributes(real,type,options) + final real = cid.getTargetPath(true) + if (real instanceof CidResultsPath) + return (real as CidResultsPath).readAttributes(type) + else + return real.fileSystem.provider().readAttributes(real,type,options) } @Override diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index a12b31152c..c287fe46d4 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,11 +18,18 @@ package nextflow.data.cid.fs import groovy.util.logging.Slf4j +import nextflow.data.cid.CidUtils import nextflow.data.cid.model.Output +import nextflow.data.cid.serde.CidEncoder +import nextflow.data.cid.serde.CidSerializable import nextflow.file.RealPathAware +import nextflow.serde.gson.GsonEncoder import nextflow.util.CacheHelper import nextflow.util.TestOnly +import java.nio.file.attribute.FileTime +import java.time.Instant + import static nextflow.data.cid.fs.CidFileSystemProvider.* import java.nio.file.FileSystem @@ -43,7 +50,6 @@ import nextflow.file.FileHelper */ @Slf4j @CompileStatic - class CidPath implements Path, RealPathAware { static public final List SUPPORTED_CHECKSUM_ALGORITHMS=["nextflow"] @@ -57,18 +63,56 @@ class CidPath implements Path, RealPathAware { // String with the cid file path private String filePath + private String query + + private String fragment + /* * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 */ protected CidPath(){} - CidPath(CidFileSystem fs, String path) { - this(fs, path, EMPTY) + CidPath(CidFileSystem fs, URI uri) { + if( uri.scheme != SCHEME ) { + throw new IllegalArgumentException("Invalid CID URI - scheme is different for $SCHEME") + } + this.fileSystem = fs + this.query = uri.query + this.fragment = uri.fragment + this.filePath = resolve0( fs, norm0("${uri.authority?:''}${uri.path}") ) } - CidPath(CidFileSystem fs, String path, String[] more) { + protected CidPath( String query, String fragment, String filepath, CidFileSystem fs){ this.fileSystem = fs - this.filePath = resolve0(fs, norm0(path), norm0(more)) + this.query = query + this.fragment = fragment + this.filePath = filepath + } + + + CidPath(CidFileSystem fs, String path) { + this( fs, asUri( CID_PROT + norm0(path)) ) + } + + CidPath(CidFileSystem fs, String first, String[] more) { + this( fs, asUri( CID_PROT + buildPath(first, more) ) ) + } + + static String asUriString(String first, String... more) { + return CID_PROT + buildPath(first, more) + } + + static boolean isCidUri(String path) { + return path && path.startsWith(CID_PROT) + } + + private static String buildPath(String first, String[] more){ + first = norm0(first) + if (more){ + final morePath = norm0(more).join(SEPARATOR) + return first.isEmpty() ? morePath : first + SEPARATOR + morePath + } + return first } private static void validateHash(Output cidObject) { @@ -97,7 +141,7 @@ class CidPath implements Path, RealPathAware { /** * Finds the target path of a CID path **/ - protected static Path findTarget(CidFileSystem fs, String filePath, String[] childs=[]) throws Exception{ + protected static Path findTarget(CidFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception{ if( !fs ) throw new IllegalArgumentException("Cannot get target path for a relative CidPath") if( filePath.isEmpty() || filePath == SEPARATOR ) @@ -108,30 +152,52 @@ class CidPath implements Path, RealPathAware { final object = store.load(filePath) if ( object ){ if( object instanceof Output ) { - final cidObject = object as Output - // return the real path stored in the metadata - validateHash(cidObject) - def realPath = FileHelper.toCanonicalPath(cidObject.path as String) - if (childs && childs.size() > 0) - realPath = realPath.resolve(childs.join(SEPARATOR)) - if( !realPath.exists() ) - throw new FileNotFoundException("Target path $realPath for $filePath does not exists.") - return realPath + return getTargetPathFromOutput(object, children) + } + + if( resultsAsPath ){ + return getMetadataAsTargetPath(object, fs, filePath, children) } } else { // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output final currentPath = Path.of(filePath) final parent = Path.of(filePath).getParent() if( parent) { - ArrayList newChilds = new ArrayList() - newChilds.add(currentPath.getFileName().toString()) - newChilds.addAll(childs) - return findTarget(fs, parent.toString(), newChilds as String[]) + ArrayList newChildren = new ArrayList() + newChildren.add(currentPath.getFileName().toString()) + newChildren.addAll(children) + return findTarget(fs, parent.toString(), resultsAsPath, newChildren as String[]) + } + } + throw new FileNotFoundException("Target path $filePath does not exists.") + } + + protected static Path getMetadataAsTargetPath(CidSerializable results, CidFileSystem fs, String filePath, String[] children){ + if( results ) { + def creationTime = CidUtils.toFileTime(CidUtils.navigate(results, 'creationTime') as String) ?: FileTime.from(Instant.now()) + if( children && children.size() > 0 ) { + final output = CidUtils.navigate(results, children.join('.')) + if( output ){ + return new CidResultsPath(new GsonEncoder(){}.withPrettyPrint(true).encode(output), creationTime, fs, filePath, children) + } } + return new CidResultsPath(new CidEncoder().withPrettyPrint(true).encode(results), creationTime, fs, filePath, children) } throw new FileNotFoundException("Target path $filePath does not exists.") } + private static Path getTargetPathFromOutput(Output object, String[] children) { + final cidObject = object as Output + // return the real path stored in the metadata + validateHash(cidObject) + def realPath = FileHelper.toCanonicalPath(cidObject.path as String) + if (children && children.size() > 0) + realPath = realPath.resolve(children.join(SEPARATOR)) + if (!realPath.exists()) + throw new FileNotFoundException("Target path $realPath does not exists.") + return realPath + } + private static boolean isEmptyBase(CidFileSystem fs, String base){ return !base || base == SEPARATOR || (fs && base == "..") } @@ -270,7 +336,7 @@ class CidPath implements Path, RealPathAware { return that } else { final newPath = Path.of(filePath).resolve(that.toString()) - return new CidPath(fileSystem, newPath.toString()) + return new CidPath(that.query, that.fragment, newPath.toString(), fileSystem) } } @@ -306,12 +372,12 @@ class CidPath implements Path, RealPathAware { // Compare 'filePath' as relative paths path = Path.of(filePath).relativize(Path.of(cidOther.filePath)) } - return new CidPath(null , path.getNameCount()>0 ? path.toString(): SEPARATOR) + return new CidPath(cidOther.query, cidOther.fragment, path.getNameCount()>0 ? path.toString() : SEPARATOR, null) } @Override URI toUri() { - asUri("${SCHEME}://${filePath}") + asUri("${SCHEME}://${filePath}${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}") } String toUriString() { @@ -325,11 +391,11 @@ class CidPath implements Path, RealPathAware { @Override Path toRealPath(LinkOption... options) throws IOException { - return this.getTargetPath() + return this.getTargetPath(true) } - protected Path getTargetPath(){ - return findTarget(fileSystem, filePath) + protected Path getTargetPath(boolean resultsAsPath=false){ + return findTarget(fileSystem, filePath, resultsAsPath, CidUtils.parseChildrenFormFragment(fragment)) } @Override @@ -344,10 +410,7 @@ class CidPath implements Path, RealPathAware { @Override int compareTo(Path other) { - if( CidPath.class != other.class ) - throw new ProviderMismatchException() - final that = other as CidPath - return Path.of(this.filePath).compareTo(Path.of(that.filePath)) + return toString().compareTo(other.toString()); } @Override @@ -375,13 +438,14 @@ class CidPath implements Path, RealPathAware { if (path.startsWith(CID_PROT + SEPARATOR) && path.length() > 7) throw new IllegalArgumentException("Invalid CID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") if (path == CID_PROT) //Empty path case - return new URI("") + return new URI("cid:///") return new URI(path) } @Override String toString() { - filePath + return "$filePath${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}".toString() } } + diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy index a7a365a6f7..cd6031f137 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,15 +17,14 @@ package nextflow.data.cid.fs +import static nextflow.data.cid.fs.CidPath.* + import java.nio.file.Path import groovy.transform.CompileStatic import nextflow.data.config.DataConfig import nextflow.file.FileHelper import nextflow.file.FileSystemPathFactory - -import static nextflow.data.cid.fs.CidPath.CID_PROT - /** * Implements a {@link FileSystemPathFactory} for CID file system * @@ -36,7 +35,7 @@ class CidPathFactory extends FileSystemPathFactory { @Override protected Path parseUri(String uri) { - return uri.startsWith(CID_PROT) ? create(uri) : null + return isCidUri(uri) ? create(uri) : null } @Override diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidResultsPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidResultsPath.groovy new file mode 100644 index 0000000000..bb3d791fdc --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidResultsPath.groovy @@ -0,0 +1,80 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import groovy.transform.CompileStatic + +import java.nio.channels.SeekableByteChannel +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.attribute.FileTime + +/** + * Class to model the metadata results description as a file. + * + * @author Jorge Ejarque + */ +@CompileStatic +class CidResultsPath extends CidPath { + private byte[] results + private FileTime creationTime + + CidResultsPath (String resultsObject, FileTime creationTime, CidFileSystem fs, String path, String[] childs) { + super(fs, path, childs) + this.results = resultsObject.getBytes("UTF-8") + this.creationTime = creationTime + } + + InputStream newInputStream() { + return new ByteArrayInputStream(results) + } + + SeekableByteChannel newSeekableByteChannel(){ + return new ResultsSeekableByteChannel(results) + } + + A readAttributes(Class type){ + return (A) new BasicFileAttributes() { + @Override + long size() { return results.length } + + @Override + FileTime lastModifiedTime() { return creationTime } + + @Override + FileTime lastAccessTime() { return creationTime } + + @Override + FileTime creationTime() { return creationTime } + + @Override + boolean isRegularFile() { return true } + + @Override + boolean isDirectory() { return false } + + @Override + boolean isSymbolicLink() { return false } + + @Override + boolean isOther() { return false } + + @Override + Object fileKey() { return null } + } + } +} \ No newline at end of file diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy new file mode 100644 index 0000000000..40a44c2569 --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy @@ -0,0 +1,77 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.fs + +import groovy.transform.CompileStatic + +import java.nio.ByteBuffer +import java.nio.channels.ClosedChannelException +import java.nio.channels.NonWritableChannelException +import java.nio.channels.SeekableByteChannel + +/** + * SeekableByteChannel for metadata results description as a file. + * + * @author Jorge Ejarque + */ +@CompileStatic +class ResultsSeekableByteChannel implements SeekableByteChannel { + private final ByteBuffer buffer + private boolean open + + ResultsSeekableByteChannel(byte[] bytes){ + this.open = true + this.buffer = ByteBuffer.wrap(bytes) + } + + @Override + int read(ByteBuffer dst) { + if (!open) throw new ClosedChannelException() + if (!buffer.hasRemaining()) return -1 + int remaining = Math.min(dst.remaining(), buffer.remaining()) + byte[] temp = new byte[remaining] + buffer.get(temp) + dst.put(temp) + return remaining + } + + @Override + int write(ByteBuffer src) { throw new NonWritableChannelException() } + + @Override + long position() { return buffer.position() } + + @Override + SeekableByteChannel position(long newPosition) { + if (newPosition < 0 || newPosition > buffer.limit()) throw new IllegalArgumentException() + buffer.position((int) newPosition) + return this + } + + @Override + long size() { return buffer.limit() } + + @Override + SeekableByteChannel truncate(long size) { throw new NonWritableChannelException() } + + @Override + boolean isOpen() { return open } + + @Override + void close() { open = false } +} \ No newline at end of file diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy index 6b17b78485..c5d0d94a1c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy @@ -33,7 +33,7 @@ abstract class Output implements CidSerializable { Checksum checksum String source long size - long createdAt - long modifiedAt - List annotations + String createdAt + String modifiedAt + Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskResults.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskResults.groovy new file mode 100644 index 0000000000..b51b0e07f1 --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskResults.groovy @@ -0,0 +1,37 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import nextflow.data.cid.serde.CidSerializable + +/** + * Models task results. + * + * @author Jorge Ejarque + */ +@Canonical +@CompileStatic +class TaskResults implements CidSerializable { + String taskRun + String runBy + String creationTime + List outputs + List annotations +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy index ec768d797f..8a65951a8d 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy @@ -29,4 +29,5 @@ import groovy.transform.InheritConstructors @CompileStatic @InheritConstructors class WorkflowOutput extends Output { + String publishedBy } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy index 5aed920bdd..0d308967e5 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy @@ -29,6 +29,7 @@ import nextflow.data.cid.serde.CidSerializable @Canonical @CompileStatic class WorkflowResults implements CidSerializable { - String run - List outputs + String creationTime + String runId + Map outputs } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy index 3ef6294be7..aa65f56d61 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy @@ -19,6 +19,7 @@ package nextflow.data.cid.serde import groovy.transform.CompileStatic import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.TaskResults import nextflow.data.cid.model.TaskRun import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowOutput @@ -35,15 +36,16 @@ import nextflow.serde.gson.RuntimeTypeAdapterFactory class CidEncoder extends GsonEncoder { CidEncoder() { - withTypeAdapterFactory( - RuntimeTypeAdapterFactory.of(CidSerializable.class, "type") - .registerSubtype(WorkflowRun, WorkflowRun.simpleName) - .registerSubtype(WorkflowResults, WorkflowResults.simpleName) - .registerSubtype(Workflow, Workflow.simpleName) - .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) - .registerSubtype(TaskRun, TaskRun.simpleName) - .registerSubtype(TaskOutput, TaskOutput.simpleName) - ) + withTypeAdapterFactory(RuntimeTypeAdapterFactory.of(CidSerializable.class, "type") + .registerSubtype(WorkflowRun, WorkflowRun.simpleName) + .registerSubtype(WorkflowResults, WorkflowResults.simpleName) + .registerSubtype(Workflow, Workflow.simpleName) + .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) + .registerSubtype(TaskRun, TaskRun.simpleName) + .registerSubtype(TaskOutput, TaskOutput.simpleName) + .registerSubtype(TaskResults, TaskResults.simpleName) ) + // enable rendering of null values + withSerializeNulls(true) } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 2ef1c69d62..7003b0439b 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -160,7 +160,7 @@ class CidObserverTest extends Specification { and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) def output = new TaskOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), - "cid://15cd5b07", attrs.size(), attrs.creationTime().toMillis(), attrs.lastModifiedTime().toMillis() ) + "cid://15cd5b07", attrs.size(), CidUtils.toDate(attrs.creationTime()), CidUtils.toDate(attrs.lastModifiedTime()) ) and: observer.readAttributes(outFile) >> attrs @@ -320,11 +320,14 @@ class CidObserverTest extends Specification { Files.createDirectories(sourceFile1.parent) sourceFile1.text = 'some data1' observer.onFilePublish(outFile1, sourceFile1) + observer.onWorkflowPublish("a", outFile1) + then: 'check file 1 output metadata in cid store' def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() def output1 = new WorkflowOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), "cid://123987/file.bam", - attrs1.size(), attrs1.creationTime().toMillis(), attrs1.lastModifiedTime().toMillis() ) + attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) + output1.setPublishedBy("$CID_PROT${observer.executionHash}".toString()) folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == encoder.encode(output1) when: 'publish without source path' @@ -334,18 +337,20 @@ class CidObserverTest extends Specification { def attrs2 = Files.readAttributes(outFile2, BasicFileAttributes) def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() observer.onFilePublish(outFile2) + observer.onWorkflowPublish("b", outFile2) then: 'Check outFile2 metadata in cid store' def output2 = new WorkflowOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), "cid://${observer.executionHash}" , - attrs2.size(), attrs2.creationTime().toMillis(), attrs2.lastModifiedTime().toMillis() ) + attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) + output2.setPublishedBy("$CID_PROT${observer.executionHash}".toString()) folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == encoder.encode(output2) when: 'Workflow complete' observer.onFlowComplete() then: 'Check history file is updated and Workflow Result is written in the cid store' - def results = new WorkflowResults( "cid://${observer.executionHash}", [ "cid://${observer.executionHash}/foo/file.bam", "cid://${observer.executionHash}/foo/file2.bam"]) def finalCid = store.getHistoryLog().getRecord(uniqueId).resultsCid.substring(CID_PROT.size()) finalCid != observer.executionHash - folder.resolve(".meta/${finalCid}/.data.json").text == encoder.encode(results) + def resultsRetrieved = store.load(finalCid) as WorkflowResults + resultsRetrieved.outputs == [a: "cid://${observer.executionHash}/foo/file.bam", b: "cid://${observer.executionHash}/foo/file2.bam"] cleanup: folder?.deleteDir() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy new file mode 100644 index 0000000000..88565b01cf --- /dev/null +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy @@ -0,0 +1,165 @@ +package nextflow.data.cid + +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowRun +import nextflow.data.config.DataConfig +import spock.lang.Specification +import spock.lang.TempDir + +import java.nio.file.Path +import java.nio.file.attribute.FileTime +import java.time.Instant + +class CidUtilsTest extends Specification{ + + @TempDir + Path tempDir + + Path storeLocation + DataConfig config + + def setup() { + storeLocation = tempDir.resolve("store") + def configMap = [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]] + config = new DataConfig(configMap) + } + + def 'should convert to Date'(){ + expect: + CidUtils.toDate(FILE_TIME) == DATE + where: + FILE_TIME | DATE + null | null + FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234).toString() + } + + def 'should convert to FileTime'(){ + expect: + CidUtils.toFileTime(DATE) == FILE_TIME + where: + FILE_TIME | DATE + null | null + FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234).toString() + } + + + def 'should query'() { + given: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) + + def cidStore = new DefaultCidStore() + cidStore.open(config) + cidStore.save(key, value1) + when: + List params = CidUtils.query(cidStore, new URI('cid://testKey#params')) + then: + params.size() == 1 + params[0] instanceof List + (params[0] as List).size() == 2 + + } + + def "should parse children elements form Fragment string"() { + expect: + CidUtils.parseChildrenFormFragment(FRAGMENT) == EXPECTED + + where: + FRAGMENT | EXPECTED + "field1" | ["field1"] + "field1.field2" | ["field1", "field2"] + null | [] + "" | [] + } + + def "should parse a query string as Map"() { + expect: + CidUtils.parseQuery(QUERY_STRING) == EXPECTED + + where: + QUERY_STRING | EXPECTED + "key1=value1&key2=value2" | ["key1": "value1", "key2": "value2"] + "key=val with space" | ["key": "val with space"] + "" | [:] + null | [:] + } + + def "should check params in an object"() { + given: + def obj = ["field": "value", "nested": ["subfield": "subvalue"]] + + expect: + CidUtils.checkParams(obj, PARAMS) == EXPECTED + + where: + PARAMS | EXPECTED + ["field": "value"] | true + ["field": "wrong"] | false + ["nested.subfield": "subvalue"] | true + ["nested.subfield": "wrong"] | false + } + + def "should navigate in object params"() { + given: + def obj = [ + "key1": "value1", + "nested": [ + "subkey": "subvalue" + ] + ] + + expect: + CidUtils.navigate(obj, PATH) == EXPECTED + + where: + PATH | EXPECTED + "key1" | "value1" + "nested.subkey" | "subvalue" + "wrongKey" | null + } + + def "should add objects matching parameters"() { + given: + def results = [] + + when: + CidUtils.treatObject(OBJECT, PARAMS, results) + + then: + results == EXPECTED + + where: + OBJECT | PARAMS | EXPECTED + ["field": "value"] | ["field": "value"] | [["field": "value"]] + ["field": "wrong"] | ["field": "value"] | [] + [["field": "value"], ["field": "x"]] | ["field": "value"] | [["field": "value"]] + "string" | [:] | ["string"] + ["nested": ["subfield": "match"]] | ["nested.subfield": "match"] | [["nested": ["subfield": "match"]]] + ["nested": ["subfield": "nomatch"]] | ["nested.subfield": "match"] | [] + [["nested": ["subfield": "match"]], ["nested": ["subfield": "other"]]] | ["nested.subfield": "match"] | [["nested": ["subfield": "match"]]] + } + + def "Should search path"() { + given: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) + def cidStore = new DefaultCidStore() + cidStore.open(config) + cidStore.save(key, value1) + when: + def result = CidUtils.searchPath(cidStore, key, ["name":"param1"], ["params"] as String[]) + + then: + result == [new Parameter("String", "param1", "value1")] + } + +} diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index af260228f1..5756dcadad 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -17,8 +17,15 @@ package nextflow.data.cid +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowOutput +import nextflow.data.cid.model.WorkflowRun + import java.nio.file.Files import java.nio.file.Path +import java.time.Instant import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.TaskOutput @@ -94,4 +101,34 @@ class DefaultCidStoreTest extends Specification { expect: cidStore.load("nonexistentKey") == null } + + def 'should query' () { + given: + def uniqueId = UUID.randomUUID() + def time = Instant.ofEpochMilli(1234567).toString() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow(mainScript, [],"https://nextflow.io/nf-test/", "123456" ) + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) + def key2 = "testKey2" + def value2 = new WorkflowOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key1:"value1", key2:"value2"]) + def key3 = "testKey3" + def value3 = new WorkflowOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key2:"value2", key3:"value3"]) + def key4 = "testKey4" + def value4 = new WorkflowOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key3:"value3", key4:"value4"]) + + def cidStore = new DefaultCidStore() + cidStore.open(config) + cidStore.save(key, value1) + cidStore.save(key2, value2) + cidStore.save(key3, value3) + cidStore.save(key4, value4) + + when: + def results3 = cidStore.search("type=WorkflowOutput&annotations.key2=value2") + then: + results3.size() == 2 + } + + } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy index 69663dd995..984247e6f1 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy @@ -17,11 +17,13 @@ package nextflow.data.cid.fs -import nextflow.data.cid.DefaultCidStore -import nextflow.data.config.DataConfig +import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.serde.CidEncoder +import nextflow.file.FileHelper +import nextflow.serde.gson.GsonEncoder import java.nio.file.Files -import java.nio.file.Path +import java.time.Instant import spock.lang.Shared import spock.lang.Specification @@ -42,6 +44,24 @@ class CidPathTest extends Specification { wdir.deleteDir() } + def 'should create from URI' () { + when: + def path = new CidPath(fs, new URI( URI_STRING )) + then: + path.filePath == PATH + path.fragment == FRAGMENT + path.query == QUERY + + where: + URI_STRING | PATH | QUERY | FRAGMENT + "cid://1234/hola" | "1234/hola" | null | null + "cid://1234/hola#frag.sub" | "1234/hola" | null | "frag.sub" + "cid://1234/#frag.sub" | "1234" | null | "frag.sub" + "cid://1234/?q=a&b=c" | "1234" | "q=a&b=c" | null + "cid://1234/?q=a&b=c#frag.sub" | "1234" | "q=a&b=c" | "frag.sub" + "cid:///" | "/" | null | null + } + def 'should create correct cid Path' () { when: def cid = new CidPath(FS, PATH, MORE) @@ -83,6 +103,7 @@ class CidPathTest extends Specification { null | './1234/c' | [] as String[] | '1234/c' fs | '1234' | ['/'] as String[] | '1234' null | '1234' | ['/'] as String[] | '1234' + null | '../../a/b' | [] as String[] | '../../a/b' } def 'should get target path' () { @@ -94,14 +115,18 @@ class CidPathTest extends Specification { outputSubFolderFile.text = "this is file1" def outputFile = data.resolve('file2.txt') outputFile.text = "this is file2" - def store = new DefaultCidStore() - store.open(new DataConfig(enabled: true, store: [location: cid.parent.toString()])) - def cidFs = Mock(CidFileSystem){ getCidStore() >> store } + + def cidFs = new FileHelper().getOrCreateFileSystemFor('cid', [enabled: true, store: [location: cid.parent.toString()]] ) + cid.resolve('12345/output1').mkdirs() cid.resolve('12345/path/to/file2.txt').mkdirs() cid.resolve('12345/.data.json').text = '{"type":"TaskRun"}' cid.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + outputFolder.toString() + '"}' cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"TaskOutput", "path": "' + outputFile.toString() + '"}' + def time = Instant.now().toString() + def wfResultsMetadata = new CidEncoder().withPrettyPrint(true).encode(new WorkflowResults(time, "cid://1234", [a: "cid://1234/a.txt"])) + cid.resolve('5678/').mkdirs() + cid.resolve('5678/.data.json').text = wfResultsMetadata expect: 'Get real path when CidPath is the output data or a subfolder' new CidPath(cidFs,'12345/output1' ).getTargetPath() == outputFolder @@ -134,6 +159,23 @@ class CidPathTest extends Specification { then: thrown(FileNotFoundException) + when: 'Cid description' + def result = new CidPath(cidFs, '5678').getTargetPath(true) + then: + result instanceof CidResultsPath + result.text == wfResultsMetadata + + when: 'Cid description subobject' + def result2 = new CidPath(cidFs, '5678#outputs').getTargetPath(true) + then: + result2 instanceof CidResultsPath + result2.text == new GsonEncoder(){}.withPrettyPrint(true).encode([a: "cid://1234/a.txt"]) + + when: 'Cid subobject does not exist' + new CidPath(cidFs, '23456#notexists').getTargetPath(true) + then: + thrown(FileNotFoundException) + cleanup: cid.resolve('12345').deleteDir() @@ -350,4 +392,43 @@ class CidPathTest extends Specification { '1234/a/b/c' | '1234/a/b/c' '' | '/' } + + @Unroll + def 'should validate asString method'() { + expect: + CidPath.asUriString(FIRST, MORE as String[]) == EXPECTED + + where: + FIRST | MORE | EXPECTED + 'foo' | [] | 'cid://foo' + 'foo/' | [] | 'cid://foo' + '/foo' | [] | 'cid://foo' + and: + 'a' | ['/b/'] | 'cid://a/b' + 'a' | ['/b','c'] | 'cid://a/b/c' + 'a' | ['/b','//c'] | 'cid://a/b/c' + 'a' | ['/b/c', 'd'] | 'cid://a/b/c/d' + '/a/' | ['/b/c', 'd'] | 'cid://a/b/c/d' + } + + @Unroll + def 'should check is cid uri string' () { + expect: + CidPath.isCidUri(STR) == EXPECTED + + where: + STR | EXPECTED + null | false + '' | false + 'foo' | false + '/foo' | false + 'cid:/foo' | false + 'cid:foo' | false + 'cid/foo' | false + and: + 'cid://' | true + 'cid:///' | true + 'cid://foo/bar' | true + } + } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy index a90b806b58..5dd46575b5 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy @@ -1,10 +1,18 @@ package nextflow.data.cid.serde import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Output +import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.TaskRun +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.WorkflowRun import spock.lang.Specification +import java.time.Instant + class CidEncoderTest extends Specification{ def 'should encode and decode Outputs'(){ @@ -19,13 +27,124 @@ class CidEncoderTest extends Specification{ then: object instanceof Output - output.path == "/path/to/file" - output.checksum instanceof Checksum - output.checksum.value == "hash_value" - output.checksum.algorithm == "hash_algorithm" - output.checksum.mode == "standard" - output.source == "cid://source" - output.size == 1234 + def result = object as Output + result.path == "/path/to/file" + result.checksum instanceof Checksum + result.checksum.value == "hash_value" + result.checksum.algorithm == "hash_algorithm" + result.checksum.mode == "standard" + result.source == "cid://source" + result.size == 1234 + + } + + def 'should encode and decode WorkflowRuns'(){ + given: + def encoder = new CidEncoder() + and: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def wfRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) + + when: + def encoded = encoder.encode(wfRun) + def object = encoder.decode(encoded) + + then: + object instanceof WorkflowRun + def result = object as WorkflowRun + result.workflow instanceof Workflow + result.workflow.mainScriptFile instanceof DataPath + result.workflow.mainScriptFile.path == "file://path/to/main.nf" + result.workflow.mainScriptFile.checksum instanceof Checksum + result.workflow.mainScriptFile.checksum.value == "78910" + result.workflow.commitId == "123456" + result.sessionId == uniqueId.toString() + result.name == "test_run" + result.params.size() == 2 + result.params.get(0).name == "param1" + } + + def 'should encode and decode WorkflowResults'(){ + given: + def encoder = new CidEncoder() + and: + def time = Instant.now().toString() + def wfResults = new WorkflowResults(time, "cid://1234", [a: "A", b: "B"]) + when: + def encoded = encoder.encode(wfResults) + def object = encoder.decode(encoded) + + then: + object instanceof WorkflowResults + def result = object as WorkflowResults + result.creationTime == time + result.runId == "cid://1234" + result.outputs == [a: "A", b: "B"] + } + + def 'should encode and decode TaskRun'() { + given: + def encoder = new CidEncoder() + and: + def uniqueId = UUID.randomUUID() + def taskRun = new TaskRun( + uniqueId.toString(),"name", new Checksum("78910", "nextflow", "standard"), + [new Parameter("String", "param1", "value1")], "container:version", "conda", "spack", "amd64", + [a: "A", b: "B"], [new DataPath("path/to/file", new Checksum("78910", "nextflow", "standard"))] + ) + when: + def encoded = encoder.encode(taskRun) + def object = encoder.decode(encoded) + then: + object instanceof TaskRun + def result = object as TaskRun + result.sessionId == uniqueId.toString() + result.name == "name" + result.codeChecksum.value == "78910" + result.inputs.size() == 1 + result.inputs.get(0).name == "param1" + result.container == "container:version" + result.conda == "conda" + result.spack == "spack" + result.architecture == "amd64" + result.globalVars == [a: "A", b: "B"] + result.binEntries.size() == 1 + result.binEntries.get(0).path == "path/to/file" + result.binEntries.get(0).checksum.value == "78910" + } + + def 'should encode and decode TaskResults'(){ + given: + def encoder = new CidEncoder() + and: + def time = Instant.now().toString() + def wfResults = new WorkflowResults(time, "cid://1234", [a: "A", b: "B"]) + when: + def encoded = encoder.encode(wfResults) + def object = encoder.decode(encoded) + + then: + object instanceof WorkflowResults + def result = object as WorkflowResults + result.creationTime == time + result.runId == "cid://1234" + result.outputs == [a: "A", b: "B"] + } + + def 'object with null date attributes' () { + given: + def encoder = new CidEncoder() + and: + def wfResults = new WorkflowResults(null, "cid://1234") + when: + def encoded = encoder.encode(wfResults) + def object = encoder.decode(encoded) + then: + encoded == '{"type":"WorkflowResults","creationTime":null,"runId":"cid://1234","outputs":null}' + def result = object as WorkflowResults + result.creationTime == null } } diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy index 1fd34ccc29..47d583c368 100644 --- a/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy @@ -42,6 +42,8 @@ abstract class GsonEncoder implements Encoder { private boolean prettyPrint + private boolean serializeNulls + private volatile Gson gson protected GsonEncoder() { @@ -58,6 +60,11 @@ abstract class GsonEncoder implements Encoder { return this } + GsonEncoder withSerializeNulls(boolean value) { + this.serializeNulls = value + return this + } + private Gson gson0() { if( gson ) return gson @@ -77,6 +84,8 @@ abstract class GsonEncoder implements Encoder { builder.registerTypeAdapterFactory(factory) if( prettyPrint ) builder.setPrettyPrinting() + if( serializeNulls ) + builder.serializeNulls() return builder.create() } diff --git a/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy b/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy index 2a0e87e5c0..f6b104d578 100644 --- a/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy +++ b/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy @@ -41,7 +41,7 @@ class PublishOpS3Test extends BaseSpec { getBucketDir() >> BUCKET_DIR } - def op = new PublishOp(sess, Mock(DataflowReadChannel), [to:'/target']) + def op = new PublishOp(sess, 'name', Mock(DataflowReadChannel), [to:'/target']) when: def result = op.getTaskDir( BASE.resolve('xx/yyyy/this/and/that.txt') ) diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy index edfb211ed5..d29f1dcd48 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy @@ -17,6 +17,8 @@ package nextflow.data.cid.h2 +import groovy.json.JsonSlurper + import java.sql.Clob import com.zaxxer.hikari.HikariDataSource @@ -25,6 +27,7 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.data.cid.CidHistoryLog import nextflow.data.cid.CidStore +import nextflow.data.cid.CidUtils import nextflow.data.cid.serde.CidEncoder import nextflow.data.cid.serde.CidSerializable import nextflow.data.config.DataConfig @@ -49,6 +52,7 @@ class H2CidStore implements CidStore { dataSource = createDataSource(config.store) // create the db tables createDbTables(dataSource) + createAlias(dataSource) return this } @@ -92,6 +96,14 @@ class H2CidStore implements CidStore { } } + static void createAlias(HikariDataSource dataSource){ + try(final sql=new Sql(dataSource)) { + sql.execute(""" + CREATE ALIAS IF NOT EXISTS JSON_MATCH FOR "nextflow.data.cid.h2.H2CidStore.matchesJsonQuery" + """) + } + } + @Override void save(String key, CidSerializable object) { final value = encoder.encode(object) @@ -121,11 +133,34 @@ class H2CidStore implements CidStore { return new H2CidHistoryLog(dataSource) } + @Override + List search(String queryString) { + final results= new LinkedList() + try(final sql=new Sql(dataSource)) { + sql.eachRow("SELECT metadata FROM cid_file WHERE JSON_MATCH(metadata, ?)", List.of(queryString)) { row -> + results.add(encoder.decode(toValue(row['metadata']) as String)) + } + } + return results + } + /** + * JSON_MATCH implementation for h2 + * @param jsonString + * @param queryString + * @return + */ + static boolean matchesJsonQuery(String jsonString, String queryString) { + def json = new JsonSlurper().parseText(jsonString) + def conditions = CidUtils.parseQuery(queryString) + return CidUtils.checkParams(json, conditions) + } + @Override void close() { dataSource.close() } + @TestOnly void truncateAllTables() { try(final sql=new Sql(dataSource)) { diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy index db346b7767..a6c67b91ae 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy @@ -18,10 +18,18 @@ package nextflow.data.cid.h2 import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowOutput +import nextflow.data.cid.model.WorkflowRun import nextflow.data.config.DataConfig import spock.lang.Shared import spock.lang.Specification + +import java.time.Instant + /** * * @author Paolo Di Tommaso @@ -50,4 +58,28 @@ class H2CidStoreTest extends Specification { store.load('/some/key').toString() == value.toString() } + def 'should query' () { + given: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def time = Instant.ofEpochMilli(1234567).toString() + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) + def key2 = "testKey2" + def value2 = new WorkflowOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key1: "value1", key2: "value2"]) + def key3 = "testKey3" + def value3 = new WorkflowOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key2: "value2", key3: "value3"]) + def key4 = "testKey4" + def value4 = new WorkflowOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key3: "value3", key4: "value4"]) + + store.save(key, value1) + store.save(key2, value2) + store.save(key3, value3) + store.save(key4, value4) + when: + def results = store.search("type=WorkflowOutput&annotations.key2=value2") + then: + results.size() == 2 + } } diff --git a/settings.gradle b/settings.gradle index a08ed47b49..c64c9887fb 100644 --- a/settings.gradle +++ b/settings.gradle @@ -26,6 +26,7 @@ rootProject.name = 'nextflow-prj' include 'nextflow' include 'nf-commons' include 'nf-httpfs' +include 'nf-lang' include 'nf-cid' include 'nf-lang' From 14e28419f4d95219c3672335d15946decdb705d0 Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Sat, 5 Apr 2025 17:26:16 +0200 Subject: [PATCH 31/72] Cid store quick wins (#5945) Signed-off-by: jorgee Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- .../main/groovy/nextflow/cli/CmdCid.groovy | 1 + .../groovy/nextflow/cli/CmdCidTest.groovy | 41 +++++---- .../nextflow/data/cid/CidHistoryFile.groovy | 69 ++++---------- .../nextflow/data/cid/CidHistoryLog.groovy | 10 +- .../nextflow/data/cid/CidHistoryRecord.groovy | 9 +- .../main/nextflow/data/cid/CidObserver.groovy | 92 ++++++++++++------- .../main/nextflow/data/cid/CidUtils.groovy | 80 ++++++++++++---- .../data/cid/cli/CidCommandImpl.groovy | 18 +--- .../data/cid/fs/CidFileSystemProvider.groovy | 14 +-- ...ultsPath.groovy => CidMetadataPath.groovy} | 8 +- .../main/nextflow/data/cid/fs/CidPath.groovy | 66 +++++++++---- .../{Output.groovy => DataOutput.groovy} | 9 +- .../nextflow/data/cid/model/TaskOutput.groovy | 32 ------- ...{TaskResults.groovy => TaskOutputs.groovy} | 10 +- .../nextflow/data/cid/model/TaskRun.groovy | 4 +- .../data/cid/model/WorkflowOutput.groovy | 33 ------- ...wResults.groovy => WorkflowOutputs.groovy} | 9 +- .../data/cid/model/WorkflowRun.groovy | 1 + .../nextflow/data/cid/serde/CidEncoder.groovy | 26 +++--- .../data/cid/CidHistoryFileTest.groovy | 23 ++--- .../data/cid/CidHistoryRecordTest.groovy | 7 +- .../nextflow/data/cid/CidObserverTest.groovy | 42 +++++---- .../nextflow/data/cid/CidUtilsTest.groovy | 2 +- .../data/cid/DefaultCidStoreTest.groovy | 17 ++-- .../cid/fs/CidFileSystemProviderTest.groovy | 10 +- .../nextflow/data/cid/fs/CidPathTest.groovy | 24 ++--- .../data/cid/serde/CidEncoderTest.groovy | 52 ++++++----- .../nextflow/serde/gson/InstantAdapter.groovy | 6 ++ .../data/cid/h2/H2CidHistoryLog.groovy | 29 +----- .../nextflow/data/cid/h2/H2CidStore.groovy | 1 - .../data/cid/h2/H2CidHistoryLogTest.groovy | 38 +------- .../data/cid/h2/H2CidStoreTest.groovy | 15 ++- 32 files changed, 373 insertions(+), 425 deletions(-) rename modules/nf-cid/src/main/nextflow/data/cid/fs/{CidResultsPath.groovy => CidMetadataPath.groovy} (90%) rename modules/nf-cid/src/main/nextflow/data/cid/model/{Output.groovy => DataOutput.groovy} (88%) delete mode 100644 modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy rename modules/nf-cid/src/main/nextflow/data/cid/model/{TaskResults.groovy => TaskOutputs.groovy} (86%) delete mode 100644 modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy rename modules/nf-cid/src/main/nextflow/data/cid/model/{WorkflowResults.groovy => WorkflowOutputs.groovy} (86%) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index dfa47d4347..48c384bd23 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -77,6 +77,7 @@ class CmdCid extends CmdBase implements UsageAware { @Override void run() { if( !args ) { + usage(List.of()) return } // setup the plugins system and load the secrets provider diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index f94f7376e3..0f65e0f053 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -27,9 +27,8 @@ import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStoreFactory import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.model.WorkflowOutput import nextflow.plugin.Plugins import org.junit.Rule import spock.lang.Specification @@ -77,7 +76,7 @@ class CmdCidTest extends Specification { def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - def recordEntry = "${CidHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://123456\tcid://456789".toString() + def recordEntry = "${CidHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://123456".toString() historyFile.text = recordEntry when: def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) @@ -136,10 +135,10 @@ class CmdCidTest extends Specification { def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - def time = Instant.ofEpochMilli(123456789).toString() + def time = Instant.ofEpochMilli(123456789) def encoder = new CidEncoder().withPrettyPrint(true) - def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", 1234, time, time, null) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam","cid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer cidFile.text = jsonSer @@ -208,22 +207,26 @@ class CmdCidTest extends Specification { Files.createDirectories(cidFile4.parent) Files.createDirectories(cidFile5.parent) def encoder = new CidEncoder() - def time = Instant.ofEpochMilli(123456789).toString() - def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", 1234, time, time, null) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://45678", 1234, time, time, null) cidFile.text = encoder.encode(entry) - entry = new TaskOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987", 1234, time, time, null) + entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987", "cid://123987", 1234, time, time, null) cidFile2.text = encoder.encode(entry) - entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), + entry = new TaskRun("u345-2346-1stw2", "foo", + new Checksum("abcde2345","nextflow","standard"), + new Checksum("abfsc2375","nextflow","standard"), [new Parameter( "ValueInParam", "sample_id","ggal_gut"), new Parameter("FileInParam","reads",["cid://45678/output.txt"])], null, null, null, null, [:],[], null) cidFile3.text = encoder.encode(entry) - entry = new TaskOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://45678", 1234, time, time, null) + entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://45678", "cid://45678", 1234, time, time, null) cidFile4.text = encoder.encode(entry) - entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), + entry = new TaskRun("u345-2346-1stw2", "bar", + new Checksum("abfs2556","nextflow","standard"), + new Checksum("abfsc2375","nextflow","standard"), null,null, null, null, null, [:],[], null) cidFile5.text = encoder.encode(entry) final network = """flowchart BT @@ -275,14 +278,14 @@ class CmdCidTest extends Specification { getOptions() >> new CliOptions(config: [configFile.toString()]) } def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789).toString() - def entry = new WorkflowOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", 1234, time, time, null) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer cidFile.text = jsonSer when: - def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid:///?type=WorkflowOutput"]) + def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid:///?type=DataOutput"]) cidCmd.run() def stdout = capture .toString() diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy index d8217f6186..cfac30f820 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy @@ -16,20 +16,22 @@ */ package nextflow.data.cid -import groovy.util.logging.Slf4j - import java.nio.channels.FileChannel import java.nio.channels.FileLock import java.nio.file.Files import java.nio.file.Path import java.nio.file.StandardOpenOption +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.extension.FilesEx /** * File to store a history of the workflow executions and their corresponding CIDs * * @author Jorge Ejarque */ @Slf4j +@CompileStatic class CidHistoryFile implements CidHistoryLog { Path path @@ -38,13 +40,13 @@ class CidHistoryFile implements CidHistoryLog { this.path = file } - void write(String name, UUID key, String runCid, String resultsCid, Date date = null) { + void write(String name, UUID key, String runCid, Date date = null) { assert key withFileLock { def timestamp = date ?: new Date() - log.trace("Writting record for $key in CID history file $this") - path << new CidHistoryRecord(timestamp, name, key, runCid, resultsCid).toString() << '\n' + log.trace("Writting record for $key in CID history file ${FilesEx.toUriString(this.path)}") + path << new CidHistoryRecord(timestamp, name, key, runCid).toString() << '\n' } } @@ -55,18 +57,7 @@ class CidHistoryFile implements CidHistoryLog { withFileLock { updateRunCid0(sessionId, runCid) } } catch (Throwable e) { - log.warn "Can't update CID history file: $this", e.message - } - } - - void updateResultsCid(UUID sessionId, String resultsCid) { - assert sessionId - - try { - withFileLock { updateResultsCid0(sessionId, resultsCid) } - } - catch (Throwable e) { - log.warn "Can't update CID history file: $this", e.message + log.warn "Can't update CID history file: ${FilesEx.toUriString(this.path)}", e.message } } @@ -76,7 +67,7 @@ class CidHistoryFile implements CidHistoryLog { withFileLock { this.path.eachLine {list.add(CidHistoryRecord.parse(it)) } } } catch (Throwable e) { - log.warn "Can't read records from CID history file: $this", e.message + log.warn "Can't read records from CID history file: ${FilesEx.toUriString(this.path)}", e.message } return list } @@ -91,7 +82,7 @@ class CidHistoryFile implements CidHistoryLog { return current } } - log.warn("Can't find session $id in CID history file $this") + log.warn("Can't find session $id in CID history file ${FilesEx.toUriString(this.path)}") return null } @@ -100,43 +91,19 @@ class CidHistoryFile implements CidHistoryLog { assert id def newHistory = new StringBuilder() - this.path.readLines().each { line -> - try { - def current = line ? CidHistoryRecord.parse(line) : null - if (current.sessionId == id) { - log.trace("Updating record for $id in CID history file $this") - final newRecord = new CidHistoryRecord(current.timestamp, current.runName, current.sessionId, runCid, current.resultsCid) - newHistory << newRecord.toString() << '\n' - } else { - newHistory << line << '\n' - } - } - catch (IllegalArgumentException e) { - log.warn("Can't read CID history file: $this", e.message) - } - } - - // rewrite the history content - this.path.setText(newHistory.toString()) - } - - private void updateResultsCid0(UUID id, String resultsCid) { - assert id - def newHistory = new StringBuilder() - - this.path.readLines().each { line -> + for( String line : this.path.readLines()) { try { def current = line ? CidHistoryRecord.parse(line) : null if (current.sessionId == id) { - log.trace("Updating record for $id in CID history file $this") - final newRecord = new CidHistoryRecord(current.timestamp, current.runName, current.sessionId, current.runCid, resultsCid) + log.trace("Updating record for $id in CID history file ${FilesEx.toUriString(this.path)}") + final newRecord = new CidHistoryRecord(current.timestamp, current.runName, current.sessionId, runCid) newHistory << newRecord.toString() << '\n' } else { newHistory << line << '\n' } } catch (IllegalArgumentException e) { - log.warn("Can't read CID history file: $this", e.message) + log.warn("Can't read CID history file: ${FilesEx.toUriString(this.path)}", e.message) } } @@ -161,11 +128,11 @@ class CidHistoryFile implements CidHistoryLog { try { fos = FileChannel.open(file, StandardOpenOption.WRITE, StandardOpenOption.CREATE) } catch (UnsupportedOperationException e){ - log.warn("File System Provider for ${this.path} do not support file locking. Continuing without lock...") + log.warn("File System Provider for ${this.path} do not support file locking - Attemting without locking", e) return action.call() } if (!fos){ - throw new IllegalStateException("Can't create a file channel for ${this.path.toAbsolutePath()}") + throw new IllegalStateException("Can't create a file channel for ${FilesEx.toUriString(this.path)}") } try { Throwable error @@ -178,7 +145,7 @@ class CidHistoryFile implements CidHistoryLog { if (System.currentTimeMillis() - ts < 1_000) sleep rnd.nextInt(75) else { - error = new IllegalStateException("Can't lock file: ${this.path.toAbsolutePath()} -- Nextflow needs to run in a file system that supports file locks") + error = new IllegalStateException("Can't lock file: ${FilesEx.toUriString(this.path)} - Nextflow needs to run in a file system that supports file locks") break } } @@ -200,4 +167,4 @@ class CidHistoryFile implements CidHistoryLog { file.delete() } } -} \ No newline at end of file +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy index bc541b7760..3b71e911e8 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy @@ -30,7 +30,7 @@ interface CidHistoryLog { * @param runCid Workflow run CID. * @param resultsCid Workflow results CID. */ - void write(String name, UUID sessionId, String runCid, String resultsCid) + void write(String name, UUID sessionId, String runCid) /** * Updates the run CID for a given session ID. @@ -40,14 +40,6 @@ interface CidHistoryLog { */ void updateRunCid(UUID sessionId, String runCid) - /** - * Updates the results CID for a given session ID. - * - * @param sessionId Workflow session ID. - * @param resultsCid Workflow results CID. - */ - void updateResultsCid(UUID sessionId, String resultsCid) - /** * Get the store records in the CidHistoryLog. * diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy index 744b114e22..544ee26ac5 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy @@ -35,14 +35,12 @@ class CidHistoryRecord { final String runName final UUID sessionId final String runCid - final String resultsCid - CidHistoryRecord(Date timestamp, String name, UUID sessionId, String runCid, String resultsCid = null) { + CidHistoryRecord(Date timestamp, String name, UUID sessionId, String runCid) { this.timestamp = timestamp this.runName = name this.sessionId = sessionId this.runCid = runCid - this.resultsCid = resultsCid } CidHistoryRecord(UUID sessionId, String name = null) { @@ -58,7 +56,6 @@ class CidHistoryRecord { line << (runName ?: '-') line << (sessionId.toString()) line << (runCid ?: '-') - line << (resultsCid ?: '-') } @Override @@ -71,8 +68,8 @@ class CidHistoryRecord { if (cols.size() == 2) return new CidHistoryRecord(UUID.fromString(cols[0])) - if (cols.size() == 5) { - return new CidHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3], cols[4]) + if (cols.size() == 4) { + return new CidHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3]) } throw new IllegalArgumentException("Not a valid history entry: `$line`") diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index 8ffcb5960a..a93c4cc19a 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -30,11 +30,10 @@ import nextflow.Session import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutput -import nextflow.data.cid.model.TaskResults +import nextflow.data.cid.model.DataOutput +import nextflow.data.cid.model.TaskOutputs import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutput -import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun import nextflow.data.cid.serde.CidEncoder import nextflow.file.FileHelper @@ -64,7 +63,7 @@ class CidObserver implements TraceObserver { private String executionHash private CidStore store private Session session - private WorkflowResults workflowResults + private WorkflowOutputs workflowResults private Map outputsStoreDirCid = new HashMap(10) private CidEncoder encoder = new CidEncoder() @@ -75,7 +74,7 @@ class CidObserver implements TraceObserver { @Override void onFlowCreate(Session session) { - this.store.getHistoryLog().write(session.runName, session.uniqueId, '-', '-') + this.store.getHistoryLog().write(session.runName, session.uniqueId, '-') } @TestOnly @@ -85,8 +84,8 @@ class CidObserver implements TraceObserver { void onFlowBegin() { executionHash = storeWorkflowRun() final executionUri = asUriString(executionHash) - workflowResults = new WorkflowResults( - Instant.now().toString(), + workflowResults = new WorkflowOutputs( + Instant.now(), executionUri, new HashMap() ) @@ -96,10 +95,9 @@ class CidObserver implements TraceObserver { @Override void onFlowComplete(){ if (this.workflowResults){ - workflowResults.creationTime = System.currentTimeMillis() - final key = CacheHelper.hasher(workflowResults).hash().toString() - this.store.save("${key}", workflowResults) - this.store.getHistoryLog().updateResultsCid(session.uniqueId, asUriString(key)) + workflowResults.createdAt = Instant.now() + final key = executionHash + SEPARATOR + 'outputs' + this.store.save(key, workflowResults) } } @@ -165,23 +163,33 @@ class CidObserver implements TraceObserver { // store the task run entry storeTaskRun(task, pathNormalizer) // store all task results - storeTaskResults(task) + storeTaskResults(task, pathNormalizer) } - protected String storeTaskResults(TaskRun task ){ + protected String storeTaskResults(TaskRun task, PathNormalizer normalizer){ + final outputParams = getNormalizedTaskOutputs(task, normalizer) + final value = new TaskOutputs( asUriString(task.hash.toString()), asUriString(executionHash), Instant.now(), outputParams ) + final key = CacheHelper.hasher(value).hash().toString() + store.save(key,value) + return key + } + + private List getNormalizedTaskOutputs( TaskRun task, PathNormalizer normalizer){ final outputs = task.getOutputs() final outputParams = new LinkedList() outputs.forEach { OutParam key, Object value -> if (key instanceof FileOutParam) { - outputParams.add(new Parameter(key.class.simpleName, key.name, manageFileOutParams(value, task))) + outputParams.add( new Parameter( key.class.simpleName, key.name, manageFileOutParams(value, task) ) ) } else { - outputParams.add(new Parameter(key.class.simpleName, key.name, value) ) + if( value instanceof Path ) + outputParams.add( new Parameter( key.class.simpleName, key.name, normalizer.normalizePath( value as Path ) ) ) + else if ( value instanceof CharSequence ) + outputParams.add( new Parameter( key.class.simpleName, key.name, normalizer.normalizePath( value.toString() ) ) ) + else + outputParams.add( new Parameter( key.class.simpleName, key.name, value) ) } } - final value = new TaskResults(asUriString(task.hash.toString()), asUriString(executionHash), Instant.now().toString(), outputParams) - final key = CacheHelper.hasher(value).hash().toString() - store.save(key,value) - return key + return outputParams } private Object manageFileOutParams( Object value, TaskRun task) { @@ -200,10 +208,13 @@ class CidObserver implements TraceObserver { protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { final codeChecksum = new Checksum(CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) + final scriptChecksum = new Checksum(CacheHelper.hasher(task.script).hash().toString(), + "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) final value = new nextflow.data.cid.model.TaskRun( session.uniqueId.toString(), task.getName(), codeChecksum, + scriptChecksum, task.inputs ? manageInputs(task.inputs, normalizer): null, task.isContainerEnabled() ? task.getContainerFingerprint(): null, normalizer.normalizePath(task.getCondaEnv()), @@ -214,7 +225,8 @@ class CidObserver implements TraceObserver { normalizer.normalizePath(p.normalize()), new Checksum(CacheHelper.hasher(p).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) ) - } + }, + asUriString(executionHash) ) // store in the underlying persistence @@ -226,15 +238,14 @@ class CidObserver implements TraceObserver { protected String storeTaskOutput(TaskRun task, Path path) { try { final attrs = readAttributes(path) - final rel = getTaskRelative(task, path) - final cid = "${task.hash}/${rel}" - final key = cid.toString() + final key = getTaskOutputKey(task, path) final checksum = new Checksum( CacheHelper.hasher(path).hash().toString(), "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) - final value = new TaskOutput( + final value = new DataOutput( path.toUriString(), checksum, asUriString(task.hash.toString()), + asUriString(task.hash.toString()), attrs.size(), CidUtils.toDate(attrs?.creationTime()), CidUtils.toDate(attrs?.lastModifiedTime())) @@ -242,9 +253,21 @@ class CidObserver implements TraceObserver { return key } catch (Throwable e) { log.warn("Exception storing CID output $path for task ${task.name}. ${e.getLocalizedMessage()}") + return path.toUriString() } } + protected String getTaskOutputKey(TaskRun task, Path path) { + final rel = getTaskRelative(task, path) + return task.hash.toString() + SEPARATOR + 'outputs' + SEPARATOR + rel + } + + protected String getWorkflowOutputKey(Path destination) { + final rel = getWorkflowRelative(destination) + return executionHash + SEPARATOR + 'outputs' + SEPARATOR + rel + + } + protected String getTaskRelative(TaskRun task, Path path){ if (path.isAbsolute()) { final rel = getTaskRelative0(task, path) @@ -292,20 +315,18 @@ class CidObserver implements TraceObserver { "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) - final rel = getWorkflowRelative(destination) - final key = "$executionHash/${rel}" - + final key = getWorkflowOutputKey(destination) final sourceReference = source ? getSourceReference(source) : asUriString(executionHash) final attrs = readAttributes(destination) - final value = new WorkflowOutput( + final value = new DataOutput( destination.toUriString(), checksum, sourceReference, + asUriString(executionHash), attrs.size(), CidUtils.toDate(attrs?.creationTime()), CidUtils.toDate(attrs?.lastModifiedTime()), annotations) - value.publishedBy = asUriString(executionHash) store.save(key, value) } catch (Throwable e) { log.warn("Exception storing published file $destination for workflow ${executionHash}.", e) @@ -316,7 +337,7 @@ class CidObserver implements TraceObserver { final hash = FileHelper.getTaskHashFromPath(source, session.workDir) if (hash) { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() - return asUriString(hash.toString(), target) + return asUriString(hash.toString(), 'outputs', target) } final storeDirReference = outputsStoreDirCid.get(source.toString()) return storeDirReference ? asUriString(storeDirReference) : null @@ -334,8 +355,13 @@ class CidObserver implements TraceObserver { private Object convertPathsToCidReferences(Object value){ if( value instanceof Path ) { - final rel = getWorkflowRelative(value) - return rel ? asUriString(executionHash, rel) : value + try { + final key = getWorkflowOutputKey(value) + return asUriString(key) + } catch (Throwable e){ + //Workflow output key not found + return value + } } if( value instanceof Collection ) { diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy index 96f37858a9..4a448c26b8 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy @@ -20,9 +20,12 @@ package nextflow.data.cid import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.data.cid.fs.CidPath +import nextflow.data.cid.model.WorkflowRun +import nextflow.data.cid.model.TaskRun +import nextflow.data.cid.serde.CidEncoder import nextflow.data.cid.serde.CidSerializable +import nextflow.serde.gson.GsonEncoder -import java.nio.file.Path import java.nio.file.attribute.FileTime import java.time.Instant @@ -102,31 +105,53 @@ class CidUtils { final object = store.load(key) if (object) { if (children && children.size() > 0) { - final output = navigate(object, children.join('.')) + final output = getSubObject(store, key, object, children) if (output) { treatObject(output, params, results) } else { - throw new FileNotFoundException("Cid object $key/${children ? children.join('/') : ''} not found.") + throw new FileNotFoundException("Cid object $key#${children.join('.')} not found.") } } else { treatObject(object, params, results) } } else { - // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output - final currentPath = Path.of(key) - final parent = currentPath.getParent() - if (parent) { - ArrayList newChildren = new ArrayList() - newChildren.add(currentPath.getFileName().toString()) - newChildren.addAll(children) - return searchPath(store, parent.toString(), params, newChildren as String[]) - } else { - throw new FileNotFoundException("Cid object $key/${children ? children.join('/') : ''} not found.") - } + throw new FileNotFoundException("Cid object $key not found.") } return results } + /** + * Get a metadata sub-object. + * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. + * + * @param store CidStore to retrieve metadata objects. + * @param key Parent metadata key. + * @param object Parent object. + * @param children Array of string in indicating the properties to navigate to get the sub-object. + * @return Sub-object or null in it does not exist. + */ + static Object getSubObject(CidStore store, String key, CidSerializable object, String[] children) { + if( isSearchingOutputs(object, children) ) { + // When asking for a Workflow or task output retrieve the outputs description + final outputs = store.load("${key}/outputs") + if (outputs) + return navigate(outputs, children.join('.')) + else + return null + } + return navigate(object, children.join('.')) + } + /** + * Check if the Cid pseudo path or query is for Task or Workflow outputs. + * + * @param object Parent Cid metadata object + * @param children Array of string in indicating the properties to navigate to get the sub-object. + * @return return 'true' if the parent is a Task/Workflow run and the first element in children is 'outputs'. Otherwise 'false' + */ + public static boolean isSearchingOutputs(CidSerializable object, String[] children) { + return (object instanceof WorkflowRun || object instanceof TaskRun) && children && children[0] == 'outputs' + } + /** * Evaluates object or the objects in a collection matches a set of parameter-value pairs. It includes in the results collection in case of match. * @param object Object or collection of objects to evaluate @@ -210,11 +235,11 @@ class CidUtils { * Helper function to convert from FileTime to ISO 8601. * * @param time File time to convert - * @return ISO Date format or 'N/A' in case of not available (null) + * @return Instant or null in case of not available (null) */ - static String toDate(FileTime time){ + static Instant toDate(FileTime time){ if (time) - return Instant.ofEpochMilli(time.toMillis()).toString() + return Instant.ofEpochMilli(time.toMillis()) else return null } @@ -230,4 +255,25 @@ class CidUtils { return null return FileTime.from(Instant.parse(date)) } + + /** + * Helper function to unify the encoding of outputs when querying and navigating the CID pseudoFS. + * Outputs can include CidSerializable objects, collections or parts of these objects. + * CidSerializable objects can be encoded with the CidEncoder, but collections or parts of + * these objects require to extend the GsonEncoder. + * + * @param output Output to encode + * @return Output encoded as a JSON string + */ + static String encodeSearchOutputs(Object output, boolean prettyPrint) { + if (output instanceof CidSerializable){ + return new CidEncoder().withPrettyPrint(prettyPrint).encode(output) + } else { + return new GsonEncoder() {} + .withPrettyPrint(prettyPrint) + .withSerializeNulls(true) + .withTypeAdapterFactory(CidEncoder.newCidTypeAdapterFactory()) + .encode(output) + } + } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index ebe19eebfa..78b95e6d9d 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -32,16 +32,11 @@ import nextflow.data.cid.CidHistoryRecord import nextflow.data.cid.CidStore import nextflow.data.cid.CidStoreFactory import nextflow.data.cid.CidUtils -import nextflow.data.cid.model.Output +import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutput import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.model.WorkflowOutput import nextflow.data.cid.model.WorkflowRun -import nextflow.data.cid.serde.CidEncoder -import nextflow.data.cid.serde.CidSerializable import nextflow.script.params.FileInParam -import nextflow.serde.gson.GsonEncoder import nextflow.ui.TableBuilder import org.eclipse.jgit.diff.DiffAlgorithm import org.eclipse.jgit.diff.DiffFormatter @@ -81,7 +76,6 @@ class CidCommandImpl implements CmdCid.CidCommand { .head('RUN NAME') .head('SESSION ID') .head('RUN CID') - .head('RESULT CID') for( CidHistoryRecord record: records ){ table.append(record.toList()) } @@ -101,10 +95,7 @@ class CidCommandImpl implements CmdCid.CidCommand { def entries = CidUtils.query(store, new URI(args[0])) if( entries ) { entries = entries.size() == 1 ? entries[0] : entries - if (entries instanceof CidSerializable) - println new CidEncoder().withPrettyPrint(true).encode(entries as CidSerializable) - else - println new GsonEncoder(){}.withPrettyPrint(true).encode(entries) + println CidUtils.encodeSearchOutputs(entries, true) } else { println "No entries found for ${args[0]}." } @@ -153,10 +144,9 @@ class CidCommandImpl implements CmdCid.CidCommand { final key = nodeToRender.substring(CID_PROT.size()) final cidObject = store.load(key) switch (cidObject.getClass()) { - case TaskOutput: - case WorkflowOutput: + case DataOutput: lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); - final source = (cidObject as Output).source + final source = (cidObject as DataOutput).source if (source) { if (isCidUri(source)) { nodes.add(source) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy index 378b9da42b..7761a3bb7d 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -113,8 +113,8 @@ class CidFileSystemProvider extends FileSystemProvider { InputStream newInputStream(Path path, OpenOption... options) throws IOException { final cid = toCidPath(path) final realPath = cid.getTargetPath(true) - if (realPath instanceof CidResultsPath) - return (realPath as CidResultsPath).newInputStream() + if (realPath instanceof CidMetadataPath) + return (realPath as CidMetadataPath).newInputStream() else return realPath.fileSystem.provider().newInputStream(realPath, options) } @@ -131,8 +131,8 @@ class CidFileSystemProvider extends FileSystemProvider { } final realPath = cid.getTargetPath(true) SeekableByteChannel channel - if (realPath instanceof CidResultsPath){ - channel = (realPath as CidResultsPath).newSeekableByteChannel() + if (realPath instanceof CidMetadataPath){ + channel = (realPath as CidMetadataPath).newSeekableByteChannel() } else { channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) } @@ -300,7 +300,7 @@ class CidFileSystemProvider extends FileSystemProvider { throw new AccessDeniedException("Execute mode not supported") } final real = cid.getTargetPath(true) - if (real instanceof CidResultsPath) + if (real instanceof CidMetadataPath) return real.fileSystem.provider().checkAccess(real, modes) } @@ -314,8 +314,8 @@ class CidFileSystemProvider extends FileSystemProvider { A readAttributes(Path path, Class type, LinkOption... options) throws IOException { final cid = toCidPath(path) final real = cid.getTargetPath(true) - if (real instanceof CidResultsPath) - return (real as CidResultsPath).readAttributes(type) + if (real instanceof CidMetadataPath) + return (real as CidMetadataPath).readAttributes(type) else return real.fileSystem.provider().readAttributes(real,type,options) } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidResultsPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy similarity index 90% rename from modules/nf-cid/src/main/nextflow/data/cid/fs/CidResultsPath.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy index bb3d791fdc..1c98ca3f84 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidResultsPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy @@ -24,17 +24,17 @@ import java.nio.file.attribute.BasicFileAttributes import java.nio.file.attribute.FileTime /** - * Class to model the metadata results description as a file. + * Class to model the metadata descriptions as a file. * * @author Jorge Ejarque */ @CompileStatic -class CidResultsPath extends CidPath { +class CidMetadataPath extends CidPath { private byte[] results private FileTime creationTime - CidResultsPath (String resultsObject, FileTime creationTime, CidFileSystem fs, String path, String[] childs) { - super(fs, path, childs) + CidMetadataPath(String resultsObject, FileTime creationTime, CidFileSystem fs, String path, String[] childs) { + super(fs, "${path}${childs ? '#'+ childs.join('.') : ''}") this.results = resultsObject.getBytes("UTF-8") this.creationTime = creationTime } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index c287fe46d4..4364eb97ca 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -18,12 +18,9 @@ package nextflow.data.cid.fs import groovy.util.logging.Slf4j -import nextflow.data.cid.CidUtils -import nextflow.data.cid.model.Output -import nextflow.data.cid.serde.CidEncoder +import nextflow.data.cid.model.DataOutput import nextflow.data.cid.serde.CidSerializable import nextflow.file.RealPathAware -import nextflow.serde.gson.GsonEncoder import nextflow.util.CacheHelper import nextflow.util.TestOnly @@ -31,6 +28,7 @@ import java.nio.file.attribute.FileTime import java.time.Instant import static nextflow.data.cid.fs.CidFileSystemProvider.* +import static nextflow.data.cid.CidUtils.* import java.nio.file.FileSystem import java.nio.file.LinkOption @@ -115,7 +113,7 @@ class CidPath implements Path, RealPathAware { return first } - private static void validateHash(Output cidObject) { + private static void validateHash(DataOutput cidObject) { final hashedPath = FileHelper.toCanonicalPath(cidObject.path as String) if( !hashedPath.exists() ) throw new FileNotFoundException("Target path $cidObject.path does not exists.") @@ -151,10 +149,9 @@ class CidPath implements Path, RealPathAware { throw new Exception("CID store not found. Check Nextflow configuration.") final object = store.load(filePath) if ( object ){ - if( object instanceof Output ) { + if( object instanceof DataOutput ) { return getTargetPathFromOutput(object, children) } - if( resultsAsPath ){ return getMetadataAsTargetPath(object, fs, filePath, children) } @@ -166,7 +163,8 @@ class CidPath implements Path, RealPathAware { ArrayList newChildren = new ArrayList() newChildren.add(currentPath.getFileName().toString()) newChildren.addAll(children) - return findTarget(fs, parent.toString(), resultsAsPath, newChildren as String[]) + //resultsAsPath set to false because parent paths are only inspected for DataOutputs + return findTarget(fs, parent.toString(), false, newChildren as String[]) } } throw new FileNotFoundException("Target path $filePath does not exists.") @@ -174,20 +172,49 @@ class CidPath implements Path, RealPathAware { protected static Path getMetadataAsTargetPath(CidSerializable results, CidFileSystem fs, String filePath, String[] children){ if( results ) { - def creationTime = CidUtils.toFileTime(CidUtils.navigate(results, 'creationTime') as String) ?: FileTime.from(Instant.now()) if( children && children.size() > 0 ) { - final output = CidUtils.navigate(results, children.join('.')) - if( output ){ - return new CidResultsPath(new GsonEncoder(){}.withPrettyPrint(true).encode(output), creationTime, fs, filePath, children) - } + return getSubObjectAsPath(fs, filePath, results, children) + }else { + return generateCidMetadataPath(fs, filePath, results, children) } - return new CidResultsPath(new CidEncoder().withPrettyPrint(true).encode(results), creationTime, fs, filePath, children) } throw new FileNotFoundException("Target path $filePath does not exists.") } - private static Path getTargetPathFromOutput(Output object, String[] children) { - final cidObject = object as Output + /** + * Get a metadata sub-object as CidMetadataPath. + * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. + * + * @param fs CidFilesystem for the te. + * @param key Parent metadata key. + * @param object Parent object. + * @param children Array of string in indicating the properties to navigate to get the sub-object. + * @return CidMetadataPath or null in it does not exist. + */ + static CidMetadataPath getSubObjectAsPath(CidFileSystem fs, String key, CidSerializable object, String[] children) { + if( isSearchingOutputs(object, children) ) { + // When asking for a Workflow or task output retrieve the outputs description + final outputs = fs.cidStore.load("${key}/outputs") + if( outputs ) { + return generateCidMetadataPath(fs, key, outputs, children) + } else + throw new FileNotFoundException("Target path $key#outputs does not exists.") + } else { + return generateCidMetadataPath(fs, key, object, children) + } + } + + private static CidMetadataPath generateCidMetadataPath(CidFileSystem fs, String key, Object object, String[] children){ + def creationTime = FileTime.from(navigate(object, 'createdAt') as Instant ?: Instant.now()) + final output = children ? navigate(object, children.join('.')) : object + if( output ){ + return new CidMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) + } + throw new FileNotFoundException("Target path $key#${children.join('.')} does not exists.") + } + + private static Path getTargetPathFromOutput(DataOutput object, String[] children) { + final cidObject = object as DataOutput // return the real path stored in the metadata validateHash(cidObject) def realPath = FileHelper.toCanonicalPath(cidObject.path as String) @@ -264,7 +291,7 @@ class CidPath implements Path, RealPathAware { @Override Path getFileName() { final result = Path.of(filePath).getFileName()?.toString() - return result ? new CidPath(null, result) : null + return result ? new CidPath( fragment, query, result, null) : null } @Override @@ -287,6 +314,9 @@ class CidPath implements Path, RealPathAware { if( index<0 ) throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") final path = Path.of(filePath) + if (index == path.nameCount - 1){ + return new CidPath( fragment, query, path.getName(index).toString(), null) + } return new CidPath(index==0 ? fileSystem : null, path.getName(index).toString()) } @@ -395,7 +425,7 @@ class CidPath implements Path, RealPathAware { } protected Path getTargetPath(boolean resultsAsPath=false){ - return findTarget(fileSystem, filePath, resultsAsPath, CidUtils.parseChildrenFormFragment(fragment)) + return findTarget(fileSystem, filePath, resultsAsPath, parseChildrenFormFragment(fragment)) } @Override diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy similarity index 88% rename from modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy index c5d0d94a1c..a2b6d24554 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Output.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy @@ -21,6 +21,8 @@ import groovy.transform.Canonical import groovy.transform.CompileStatic import nextflow.data.cid.serde.CidSerializable +import java.time.Instant + /** * Model a base class for workflow and task outputs * @@ -28,12 +30,13 @@ import nextflow.data.cid.serde.CidSerializable */ @Canonical @CompileStatic -abstract class Output implements CidSerializable { +class DataOutput implements CidSerializable { String path Checksum checksum String source + String run long size - String createdAt - String modifiedAt + Instant createdAt + Instant modifiedAt Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy deleted file mode 100644 index f0f2828dff..0000000000 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutput.groovy +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.data.cid.model - -import groovy.transform.Canonical -import groovy.transform.CompileStatic -import groovy.transform.InheritConstructors - -/** - * Model a task output object - * - * @author Paolo Di Tommaso - */ -@Canonical -@CompileStatic -@InheritConstructors -class TaskOutput extends Output { -} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskResults.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy similarity index 86% rename from modules/nf-cid/src/main/nextflow/data/cid/model/TaskResults.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy index b51b0e07f1..a80cf11b64 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskResults.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy @@ -21,6 +21,8 @@ import groovy.transform.Canonical import groovy.transform.CompileStatic import nextflow.data.cid.serde.CidSerializable +import java.time.Instant + /** * Models task results. * @@ -28,10 +30,10 @@ import nextflow.data.cid.serde.CidSerializable */ @Canonical @CompileStatic -class TaskResults implements CidSerializable { +class TaskOutputs implements CidSerializable { String taskRun - String runBy - String creationTime + String workflowRun + Instant createdAt List outputs - List annotations + Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy index e6b9fbdadc..6711d9616c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy @@ -32,6 +32,7 @@ class TaskRun implements CidSerializable { String sessionId String name Checksum codeChecksum + Checksum scriptChecksum List inputs String container String conda @@ -39,5 +40,6 @@ class TaskRun implements CidSerializable { String architecture Map globalVars List binEntries - List annotations + String workflowRun + Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy deleted file mode 100644 index 8a65951a8d..0000000000 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutput.groovy +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.data.cid.model - -import groovy.transform.Canonical -import groovy.transform.CompileStatic -import groovy.transform.InheritConstructors - -/** - * Model a workflow output object - * - * @author Paolo Di Tommaso - */ -@Canonical -@CompileStatic -@InheritConstructors -class WorkflowOutput extends Output { - String publishedBy -} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy similarity index 86% rename from modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy rename to modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy index 0d308967e5..fa9799d084 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowResults.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy @@ -21,6 +21,8 @@ import groovy.transform.Canonical import groovy.transform.CompileStatic import nextflow.data.cid.serde.CidSerializable +import java.time.Instant + /** * Models the results of a workflow execution. * @@ -28,8 +30,9 @@ import nextflow.data.cid.serde.CidSerializable */ @Canonical @CompileStatic -class WorkflowResults implements CidSerializable { - String creationTime - String runId +class WorkflowOutputs implements CidSerializable { + Instant createdAt + String workflowRun Map outputs + Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy index d0f76871f1..87f401d7e9 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy @@ -33,4 +33,5 @@ class WorkflowRun implements CidSerializable { String sessionId String name List params + Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy index aa65f56d61..6f23cdbc69 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy @@ -16,17 +16,16 @@ package nextflow.data.cid.serde - import groovy.transform.CompileStatic -import nextflow.data.cid.model.TaskOutput -import nextflow.data.cid.model.TaskResults +import nextflow.data.cid.model.DataOutput +import nextflow.data.cid.model.TaskOutputs import nextflow.data.cid.model.TaskRun import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutput -import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun import nextflow.serde.gson.GsonEncoder import nextflow.serde.gson.RuntimeTypeAdapterFactory + /** * Implements a JSON encoder for CID model objects * @@ -36,16 +35,19 @@ import nextflow.serde.gson.RuntimeTypeAdapterFactory class CidEncoder extends GsonEncoder { CidEncoder() { - withTypeAdapterFactory(RuntimeTypeAdapterFactory.of(CidSerializable.class, "type") + withTypeAdapterFactory(newCidTypeAdapterFactory()) + // enable rendering of null values + withSerializeNulls(true) + } + + static RuntimeTypeAdapterFactory newCidTypeAdapterFactory(){ + RuntimeTypeAdapterFactory.of(CidSerializable.class, "type") .registerSubtype(WorkflowRun, WorkflowRun.simpleName) - .registerSubtype(WorkflowResults, WorkflowResults.simpleName) + .registerSubtype(WorkflowOutputs, WorkflowOutputs.simpleName) .registerSubtype(Workflow, Workflow.simpleName) - .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) .registerSubtype(TaskRun, TaskRun.simpleName) - .registerSubtype(TaskOutput, TaskOutput.simpleName) - .registerSubtype(TaskResults, TaskResults.simpleName) ) - // enable rendering of null values - withSerializeNulls(true) + .registerSubtype(TaskOutputs, TaskOutputs.simpleName) + .registerSubtype(DataOutput, DataOutput.simpleName) } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy index ac9d3a627a..c8385bf9c4 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy @@ -48,10 +48,9 @@ class CidHistoryFileTest extends Specification { UUID sessionId = UUID.randomUUID() String runName = "TestRun" String runCid = "cid://123" - String resultsCid = "cid://456" when: - cidHistoryFile.write(runName, sessionId, runCid, resultsCid) + cidHistoryFile.write(runName, sessionId, runCid) then: def lines = Files.readAllLines(historyFile) @@ -60,7 +59,6 @@ class CidHistoryFileTest extends Specification { parsedRecord.sessionId == sessionId parsedRecord.runName == runName parsedRecord.runCid == runCid - parsedRecord.resultsCid == resultsCid } def "should return correct record for existing session"() { @@ -68,10 +66,9 @@ class CidHistoryFileTest extends Specification { UUID sessionId = UUID.randomUUID() String runName = "Run1" String runCid = "cid://123" - String resultsCid = "cid://456" and: - cidHistoryFile.write(runName, sessionId, runCid, resultsCid) + cidHistoryFile.write(runName, sessionId, runCid) when: def record = cidHistoryFile.getRecord(sessionId) @@ -79,7 +76,6 @@ class CidHistoryFileTest extends Specification { record.sessionId == sessionId record.runName == runName record.runCid == runCid - record.resultsCid == resultsCid } def "should return null if session does not exist"() { @@ -87,7 +83,7 @@ class CidHistoryFileTest extends Specification { cidHistoryFile.getRecord(UUID.randomUUID()) == null } - def "update should modify existing Cids for given session"() { + def "update should modify existing Cid for given session"() { given: UUID sessionId = UUID.randomUUID() String runName = "Run1" @@ -95,18 +91,16 @@ class CidHistoryFileTest extends Specification { String resultsCidUpdated = "results-cid-updated" and: - cidHistoryFile.write(runName, sessionId, 'run-cid-initial', 'results-cid-inital') + cidHistoryFile.write(runName, sessionId, 'run-cid-initial') when: cidHistoryFile.updateRunCid(sessionId, runCidUpdated) - cidHistoryFile.updateResultsCid(sessionId, resultsCidUpdated) then: def lines = Files.readAllLines(historyFile) lines.size() == 1 def parsedRecord = CidHistoryRecord.parse(lines[0]) parsedRecord.runCid == runCidUpdated - parsedRecord.resultsCid == resultsCidUpdated } def "update should do nothing if session does not exist"() { @@ -115,19 +109,16 @@ class CidHistoryFileTest extends Specification { UUID nonExistingSessionId = UUID.randomUUID() String runName = "Run1" String runCid = "cid://123" - String resultsCid = "cid://456" and: - cidHistoryFile.write(runName, existingSessionId, runCid, resultsCid) + cidHistoryFile.write(runName, existingSessionId, runCid) when: cidHistoryFile.updateRunCid(nonExistingSessionId, "new-cid") - cidHistoryFile.updateRunCid(nonExistingSessionId, "new-res-cid") then: def lines = Files.readAllLines(historyFile) lines.size() == 1 def parsedRecord = CidHistoryRecord.parse(lines[0]) parsedRecord.runCid == runCid - parsedRecord.resultsCid == resultsCid } def 'should get records' () { @@ -135,9 +126,8 @@ class CidHistoryFileTest extends Specification { UUID sessionId = UUID.randomUUID() String runName = "Run1" String runCid = "cid://123" - String resultsCid = "cid://456" and: - cidHistoryFile.write(runName, sessionId, runCid, resultsCid) + cidHistoryFile.write(runName, sessionId, runCid) when: def records = cidHistoryFile.getRecords() @@ -146,7 +136,6 @@ class CidHistoryFileTest extends Specification { records[0].sessionId == sessionId records[0].runName == runName records[0].runCid == runCid - records[0].resultsCid == resultsCid } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy index 150e6c8bee..6a104dcafc 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy @@ -36,7 +36,7 @@ class CidHistoryRecordTest extends Specification { given: def timestamp = new Date() def formattedTimestamp = CidHistoryRecord.TIMESTAMP_FMT.format(timestamp) - def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tcid://123\tcid://456" + def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tcid://123" when: def record = CidHistoryRecord.parse(line) @@ -45,19 +45,18 @@ class CidHistoryRecordTest extends Specification { record.timestamp != null record.runName == "run-1" record.runCid == "cid://123" - record.resultsCid == "cid://456" } def "CidRecord toString should produce tab-separated format"() { given: UUID sessionId = UUID.randomUUID() - def record = new CidHistoryRecord(new Date(), "TestRun", sessionId, "cid://123", "cid://456") + def record = new CidHistoryRecord(new Date(), "TestRun", sessionId, "cid://123") when: def line = record.toString() then: line.contains("\t") - line.split("\t").size() == 5 + line.split("\t").size() == 4 } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 7003b0439b..699700a6bb 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -27,10 +27,9 @@ import com.google.common.hash.HashCode import nextflow.Session import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutput -import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun import nextflow.data.cid.serde.CidEncoder import nextflow.data.config.DataConfig @@ -99,6 +98,7 @@ class CidObserverTest extends Specification { } store.open(DataConfig.create(session)) def observer = new CidObserver(session, store) + observer.executionHash = "hash" and: def hash = HashCode.fromInt(123456789) and: @@ -112,15 +112,18 @@ class CidObserverTest extends Specification { getHash() >> hash getProcessor() >> processor getSource() >> 'echo task source' + getScript() >> 'this is the script' } - def sourceHash =CacheHelper.hasher('echo task source').hash().toString() + def sourceHash = CacheHelper.hasher('echo task source').hash().toString() + def scriptHash = CacheHelper.hasher('this is the script').hash().toString() def normalizer = Mock(PathNormalizer.class) { normalizePath( _ as Path) >> {Path p -> p?.toString()} normalizePath( _ as String) >> {String p -> p} } def taskDescription = new nextflow.data.cid.model.TaskRun(uniqueId.toString(), "foo", new Checksum(sourceHash, "nextflow", "standard"), - null, null, null, null, null, [:], [], null ) + new Checksum(scriptHash, "nextflow", "standard"), + null, null, null, null, null, [:], [], "cid://hash", null) when: observer.storeTaskRun(task, normalizer) then: @@ -159,15 +162,15 @@ class CidObserverTest extends Specification { } and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) - def output = new TaskOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), - "cid://15cd5b07", attrs.size(), CidUtils.toDate(attrs.creationTime()), CidUtils.toDate(attrs.lastModifiedTime()) ) + def output = new DataOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), + "cid://15cd5b07", "cid://15cd5b07", attrs.size(), CidUtils.toDate(attrs.creationTime()), CidUtils.toDate(attrs.lastModifiedTime()) ) and: observer.readAttributes(outFile) >> attrs when: observer.storeTaskOutput(task, outFile) then: - folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text == new CidEncoder().encode(output) + folder.resolve(".meta/${hash}/outputs/foo/bar/file.bam/.data.json").text == new CidEncoder().encode(output) cleanup: folder?.deleteDir() @@ -325,10 +328,10 @@ class CidObserverTest extends Specification { then: 'check file 1 output metadata in cid store' def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() - def output1 = new WorkflowOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), "cid://123987/file.bam", - attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) - output1.setPublishedBy("$CID_PROT${observer.executionHash}".toString()) - folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == encoder.encode(output1) + def output1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), + "cid://123987/outputs/file.bam", "$CID_PROT${observer.executionHash}", + attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) + folder.resolve(".meta/${observer.executionHash}/outputs/foo/file.bam/.data.json").text == encoder.encode(output1) when: 'publish without source path' def outFile2 = outputDir.resolve('foo/file2.bam') @@ -339,18 +342,17 @@ class CidObserverTest extends Specification { observer.onFilePublish(outFile2) observer.onWorkflowPublish("b", outFile2) then: 'Check outFile2 metadata in cid store' - def output2 = new WorkflowOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), "cid://${observer.executionHash}" , - attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) - output2.setPublishedBy("$CID_PROT${observer.executionHash}".toString()) - folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == encoder.encode(output2) + def output2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), + "cid://${observer.executionHash}" , "cid://${observer.executionHash}", + attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) + folder.resolve(".meta/${observer.executionHash}/outputs/foo/file2.bam/.data.json").text == encoder.encode(output2) when: 'Workflow complete' observer.onFlowComplete() then: 'Check history file is updated and Workflow Result is written in the cid store' - def finalCid = store.getHistoryLog().getRecord(uniqueId).resultsCid.substring(CID_PROT.size()) - finalCid != observer.executionHash - def resultsRetrieved = store.load(finalCid) as WorkflowResults - resultsRetrieved.outputs == [a: "cid://${observer.executionHash}/foo/file.bam", b: "cid://${observer.executionHash}/foo/file2.bam"] + def finalCid = store.getHistoryLog().getRecord(uniqueId).runCid.substring(CID_PROT.size()) + def resultsRetrieved = store.load("${finalCid}/outputs") as WorkflowOutputs + resultsRetrieved.outputs == [a: "cid://${observer.executionHash}/outputs/foo/file.bam", b: "cid://${observer.executionHash}/outputs/foo/file2.bam"] cleanup: folder?.deleteDir() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy index 88565b01cf..d473297b25 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy @@ -33,7 +33,7 @@ class CidUtilsTest extends Specification{ where: FILE_TIME | DATE null | null - FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234).toString() + FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234) } def 'should convert to FileTime'(){ diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index 5756dcadad..9a7259b908 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -18,9 +18,9 @@ package nextflow.data.cid import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutput import nextflow.data.cid.model.WorkflowRun import java.nio.file.Files @@ -28,7 +28,6 @@ import java.nio.file.Path import java.time.Instant import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.TaskOutput import nextflow.data.cid.serde.CidEncoder import nextflow.data.config.DataConfig import spock.lang.Specification @@ -68,7 +67,7 @@ class DefaultCidStoreTest extends Specification { def "save should store value in the correct file location"() { given: def key = "testKey" - def value = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) def cidStore = new DefaultCidStore() cidStore.open(config) @@ -84,7 +83,7 @@ class DefaultCidStoreTest extends Specification { def "load should retrieve stored value correctly"() { given: def key = "testKey" - def value = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) def cidStore = new DefaultCidStore() cidStore.open(config) cidStore.save(key, value) @@ -105,17 +104,17 @@ class DefaultCidStoreTest extends Specification { def 'should query' () { given: def uniqueId = UUID.randomUUID() - def time = Instant.ofEpochMilli(1234567).toString() + def time = Instant.ofEpochMilli(1234567) def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) def workflow = new Workflow(mainScript, [],"https://nextflow.io/nf-test/", "123456" ) def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) def key2 = "testKey2" - def value2 = new WorkflowOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key1:"value1", key2:"value2"]) + def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", 1234, time, time, [key1:"value1", key2:"value2"]) def key3 = "testKey3" - def value3 = new WorkflowOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key2:"value2", key3:"value3"]) + def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", 1234, time, time, [key2:"value2", key3:"value3"]) def key4 = "testKey4" - def value4 = new WorkflowOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key3:"value3", key4:"value4"]) + def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", 1234, time, time, [key3:"value3", key4:"value4"]) def cidStore = new DefaultCidStore() cidStore.open(config) @@ -125,7 +124,7 @@ class DefaultCidStoreTest extends Specification { cidStore.save(key4, value4) when: - def results3 = cidStore.search("type=WorkflowOutput&annotations.key2=value2") + def results3 = cidStore.search("type=DataOutput&annotations.key2=value2") then: results3.size() == 2 } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy index d03264133d..f7899528b4 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy @@ -127,7 +127,7 @@ class CidFileSystemProviderTest extends Specification { def output = data.resolve("output.txt") output.text = "Hello, World!" outputMeta.mkdirs() - outputMeta.resolve(".data.json").text = '{"type":"WorkflowOutput","path":"'+output.toString()+'"}' + outputMeta.resolve(".data.json").text = '{"type":"DataOutput","path":"'+output.toString()+'"}' Global.session = Mock(Session) { getConfig()>>config } and: @@ -157,7 +157,7 @@ class CidFileSystemProviderTest extends Specification { def output = data.resolve("output.txt") output.text = "Hello, World!" outputMeta.mkdirs() - outputMeta.resolve(".data.json").text = '{"type":"WorkflowOutput","path":"'+output.toString()+'"}' + outputMeta.resolve(".data.json").text = '{"type":"DataOutput","path":"'+output.toString()+'"}' Global.session = Mock(Session) { getConfig()>>config } and: @@ -198,7 +198,7 @@ class CidFileSystemProviderTest extends Specification { meta.resolve('12345/output1').mkdirs() meta.resolve('12345/output2').mkdirs() meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' - meta.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + output1.toString() + '"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + output1.toString() + '"}' and: def config = [workflow:[data:[store:[location:wdir.toString()]]]] @@ -313,7 +313,7 @@ class CidFileSystemProviderTest extends Specification { output.resolve('abc').text = 'file1' output.resolve('.foo').text = 'file2' meta.resolve('12345/output').mkdirs() - meta.resolve('12345/output/.data.json').text = '{"type":"TaskOutput", "path": "' + output.toString() + '"}' + meta.resolve('12345/output/.data.json').text = '{"type":"DataOutput", "path": "' + output.toString() + '"}' and: def provider = new CidFileSystemProvider() def cid1 = provider.getPath(CidPath.asUri('cid://12345/output/abc')) @@ -333,7 +333,7 @@ class CidFileSystemProviderTest extends Specification { def file = data.resolve('abc') file.text = 'Hello' meta.resolve('12345/abc').mkdirs() - meta.resolve('12345/abc/.data.json').text = '{"type":"TaskOutput", "path": "' + file.toString() + '"}' + meta.resolve('12345/abc/.data.json').text = '{"type":"DataOutput", "path": "' + file.toString() + '"}' Global.session = Mock(Session) { getConfig()>>config } and: def provider = new CidFileSystemProvider() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy index 984247e6f1..06bc689e07 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy @@ -17,10 +17,10 @@ package nextflow.data.cid.fs -import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.CidUtils +import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.serde.CidEncoder import nextflow.file.FileHelper -import nextflow.serde.gson.GsonEncoder import java.nio.file.Files import java.time.Instant @@ -116,21 +116,21 @@ class CidPathTest extends Specification { def outputFile = data.resolve('file2.txt') outputFile.text = "this is file2" - def cidFs = new FileHelper().getOrCreateFileSystemFor('cid', [enabled: true, store: [location: cid.parent.toString()]] ) + def cidFs = new FileHelper().getOrCreateFileSystemFor('cid', [enabled: true, store: [location: cid.parent.toString()]] ) as CidFileSystem cid.resolve('12345/output1').mkdirs() cid.resolve('12345/path/to/file2.txt').mkdirs() cid.resolve('12345/.data.json').text = '{"type":"TaskRun"}' - cid.resolve('12345/output1/.data.json').text = '{"type":"TaskOutput", "path": "' + outputFolder.toString() + '"}' - cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"TaskOutput", "path": "' + outputFile.toString() + '"}' - def time = Instant.now().toString() - def wfResultsMetadata = new CidEncoder().withPrettyPrint(true).encode(new WorkflowResults(time, "cid://1234", [a: "cid://1234/a.txt"])) + cid.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + outputFolder.toString() + '"}' + cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"DataOutput", "path": "' + outputFile.toString() + '"}' + def time = Instant.now() + def wfResultsMetadata = new CidEncoder().withPrettyPrint(true).encode(new WorkflowOutputs(time, "cid://1234", [a: "cid://1234/a.txt"])) cid.resolve('5678/').mkdirs() cid.resolve('5678/.data.json').text = wfResultsMetadata expect: 'Get real path when CidPath is the output data or a subfolder' - new CidPath(cidFs,'12345/output1' ).getTargetPath() == outputFolder - new CidPath(cidFs,'12345/output1/some/path' ).getTargetPath() == outputSubFolder + new CidPath(cidFs, '12345/output1').getTargetPath() == outputFolder + new CidPath(cidFs,'12345/output1/some/path').getTargetPath() == outputSubFolder new CidPath(cidFs,'12345/output1/some/path/file1.txt').getTargetPath().text == outputSubFolderFile.text new CidPath(cidFs, '12345/path/to/file2.txt').getTargetPath().text == outputFile.text @@ -162,14 +162,14 @@ class CidPathTest extends Specification { when: 'Cid description' def result = new CidPath(cidFs, '5678').getTargetPath(true) then: - result instanceof CidResultsPath + result instanceof CidMetadataPath result.text == wfResultsMetadata when: 'Cid description subobject' def result2 = new CidPath(cidFs, '5678#outputs').getTargetPath(true) then: - result2 instanceof CidResultsPath - result2.text == new GsonEncoder(){}.withPrettyPrint(true).encode([a: "cid://1234/a.txt"]) + result2 instanceof CidMetadataPath + result2.text == CidUtils.encodeSearchOutputs([a: "cid://1234/a.txt"], true) when: 'Cid subobject does not exist' new CidPath(cidFs, '23456#notexists').getTargetPath(true) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy index 5dd46575b5..e450090754 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy @@ -2,12 +2,12 @@ package nextflow.data.cid.serde import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.Output import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutput +import nextflow.data.cid.model.DataOutput +import nextflow.data.cid.model.TaskOutputs import nextflow.data.cid.model.TaskRun import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowResults +import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun import spock.lang.Specification @@ -19,15 +19,15 @@ class CidEncoderTest extends Specification{ given: def encoder = new CidEncoder() and: - def output = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) + def output = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) when: def encoded = encoder.encode(output) def object = encoder.decode(encoded) then: - object instanceof Output - def result = object as Output + object instanceof DataOutput + def result = object as DataOutput result.path == "/path/to/file" result.checksum instanceof Checksum result.checksum.value == "hash_value" @@ -70,17 +70,17 @@ class CidEncoderTest extends Specification{ given: def encoder = new CidEncoder() and: - def time = Instant.now().toString() - def wfResults = new WorkflowResults(time, "cid://1234", [a: "A", b: "B"]) + def time = Instant.now() + def wfResults = new WorkflowOutputs(time, "cid://1234", [a: "A", b: "B"]) when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) then: - object instanceof WorkflowResults - def result = object as WorkflowResults - result.creationTime == time - result.runId == "cid://1234" + object instanceof WorkflowOutputs + def result = object as WorkflowOutputs + result.createdAt == time + result.workflowRun == "cid://1234" result.outputs == [a: "A", b: "B"] } @@ -90,7 +90,7 @@ class CidEncoderTest extends Specification{ and: def uniqueId = UUID.randomUUID() def taskRun = new TaskRun( - uniqueId.toString(),"name", new Checksum("78910", "nextflow", "standard"), + uniqueId.toString(),"name", new Checksum("78910", "nextflow", "standard"), new Checksum("74517", "nextflow", "standard"), [new Parameter("String", "param1", "value1")], "container:version", "conda", "spack", "amd64", [a: "A", b: "B"], [new DataPath("path/to/file", new Checksum("78910", "nextflow", "standard"))] ) @@ -103,6 +103,7 @@ class CidEncoderTest extends Specification{ result.sessionId == uniqueId.toString() result.name == "name" result.codeChecksum.value == "78910" + result.scriptChecksum.value == "74517" result.inputs.size() == 1 result.inputs.get(0).name == "param1" result.container == "container:version" @@ -119,32 +120,35 @@ class CidEncoderTest extends Specification{ given: def encoder = new CidEncoder() and: - def time = Instant.now().toString() - def wfResults = new WorkflowResults(time, "cid://1234", [a: "A", b: "B"]) + def time = Instant.now() + def parameter = new Parameter("a","b", "c") + def wfResults = new TaskOutputs("cid://1234", "cid://5678", time, [parameter], null) when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) then: - object instanceof WorkflowResults - def result = object as WorkflowResults - result.creationTime == time - result.runId == "cid://1234" - result.outputs == [a: "A", b: "B"] + object instanceof TaskOutputs + def result = object as TaskOutputs + result.createdAt == time + result.taskRun == "cid://1234" + result.workflowRun == "cid://5678" + result.outputs.size() == 1 + result.outputs[0] == parameter } def 'object with null date attributes' () { given: def encoder = new CidEncoder() and: - def wfResults = new WorkflowResults(null, "cid://1234") + def wfResults = new WorkflowOutputs(null, "cid://1234") when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) then: - encoded == '{"type":"WorkflowResults","creationTime":null,"runId":"cid://1234","outputs":null}' - def result = object as WorkflowResults - result.creationTime == null + encoded == '{"type":"WorkflowOutputs","createdAt":null,"workflowRun":"cid://1234","outputs":null,"annotations":null}' + def result = object as WorkflowOutputs + result.createdAt == null } } diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy index 5a09bec52a..cfed8e5c69 100644 --- a/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy @@ -16,6 +16,8 @@ package nextflow.serde.gson +import com.google.gson.stream.JsonToken + import java.time.Instant import com.google.gson.TypeAdapter @@ -37,6 +39,10 @@ class InstantAdapter extends TypeAdapter { @Override Instant read(JsonReader reader) throws IOException { + if (reader.peek() == JsonToken.NULL) { + reader.nextNull(); + return null; + } return Instant.parse(reader.nextString()) } } diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy index e112f0daea..56aad878ea 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy @@ -43,14 +43,14 @@ class H2CidHistoryLog implements CidHistoryLog { } @Override - void write(String name, UUID sessionId, String runCid, String resultsCid) { + void write(String name, UUID sessionId, String runCid) { try(final sql=new Sql(dataSource)) { def query = """ - INSERT INTO cid_history_record (timestamp, run_name, session_id, run_cid, results_cid) - VALUES (?, ?, ?, ?, ?) + INSERT INTO cid_history_record (timestamp, run_name, session_id, run_cid) + VALUES (?, ?, ?, ?) """ def timestamp = new Timestamp(System.currentTimeMillis()) // Current timestamp - sql.executeInsert(query, List.of(timestamp, name, sessionId.toString(), runCid, resultsCid)) + sql.executeInsert(query, List.of(timestamp, name, sessionId.toString(), runCid)) } } @@ -73,25 +73,6 @@ class H2CidHistoryLog implements CidHistoryLog { } } - @Override - void updateResultsCid(UUID sessionId, String resultsCid) { - try(final sql=new Sql(dataSource)) { - def query = """ - UPDATE cid_history_record - SET results_cid = ? - WHERE session_id = ? - """ - - final count = sql.executeUpdate(query, List.of(resultsCid, sessionId.toString())) - if (count > 0) { - log.debug "Successfully updated run_cid for session_id: $sessionId" - } - else { - log.warn "No record found with session_id: $sessionId" - } - } - } - @Override List getRecords() { try(final sql=new Sql(dataSource)) { @@ -105,7 +86,6 @@ class H2CidHistoryLog implements CidHistoryLog { row.run_name as String, UUID.fromString(row.session_id as String), row.run_cid as String, - row.results_cid as String ) ) } @@ -125,7 +105,6 @@ class H2CidHistoryLog implements CidHistoryLog { row.run_name as String, UUID.fromString(row.session_id as String), row.run_cid as String, - row.results_cid as String ) } } diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy index d29f1dcd48..5856a10839 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy @@ -89,7 +89,6 @@ class H2CidStore implements CidStore { run_name VARCHAR(255) NOT NULL, session_id UUID NOT NULL, run_cid VARCHAR(255) NOT NULL, - results_cid VARCHAR(255) NOT NULL, UNIQUE (run_name, session_id) -- Enforce uniqueness constraint ); ''') diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy index 43bd6f527e..e8878bf2fc 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy @@ -49,7 +49,7 @@ class H2CidHistoryLogTest extends Specification { def log = store.getHistoryLog() def uuid = UUID.randomUUID() when: - log.write('foo', uuid, '1234', '4321') + log.write('foo', uuid, '1234') then: noExceptionThrown() @@ -59,7 +59,6 @@ class H2CidHistoryLogTest extends Specification { rec.runName == 'foo' rec.sessionId == uuid rec.runCid == '1234' - rec.resultsCid == '4321' } def 'should update run cid' () { @@ -67,7 +66,7 @@ class H2CidHistoryLogTest extends Specification { def log = store.getHistoryLog() def uuid = UUID.randomUUID() when: - log.write('foo', uuid, '1234', '4321') + log.write('foo', uuid, '1234') then: noExceptionThrown() @@ -82,30 +81,6 @@ class H2CidHistoryLogTest extends Specification { rec.runName == 'foo' rec.sessionId == uuid rec.runCid == '4444' - rec.resultsCid == '4321' - } - - def 'should update results cid' () { - given: - def log = store.getHistoryLog() - def uuid = UUID.randomUUID() - when: - log.write('foo', uuid, '1234', '4321') - then: - noExceptionThrown() - - when: - log.updateResultsCid(uuid, '5555') - then: - noExceptionThrown() - - when: - def rec = log.getRecord(uuid) - then: - rec.runName == 'foo' - rec.sessionId == uuid - rec.runCid == '1234' - rec.resultsCid == '5555' } def 'should update get records' () { @@ -115,9 +90,9 @@ class H2CidHistoryLogTest extends Specification { def uuid2 = UUID.randomUUID() def uuid3 = UUID.randomUUID() when: - log.write('foo1', uuid1, '1', '11') - log.write('foo2', uuid2, '2', '22') - log.write('foo3', uuid3, '3', '33') + log.write('foo1', uuid1, '1') + log.write('foo2', uuid2, '2') + log.write('foo3', uuid3, '3') then: noExceptionThrown() @@ -129,17 +104,14 @@ class H2CidHistoryLogTest extends Specification { all[0].runName == 'foo1' all[0].sessionId == uuid1 all[0].runCid == '1' - all[0].resultsCid == '11' and: all[1].runName == 'foo2' all[1].sessionId == uuid2 all[1].runCid == '2' - all[1].resultsCid == '22' and: all[2].runName == 'foo3' all[2].sessionId == uuid3 all[2].runCid == '3' - all[2].resultsCid == '33' } } diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy index a6c67b91ae..e2b6ee3dda 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy @@ -19,10 +19,9 @@ package nextflow.data.cid.h2 import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutput import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutput import nextflow.data.cid.model.WorkflowRun import nextflow.data.config.DataConfig import spock.lang.Shared @@ -51,7 +50,7 @@ class H2CidStoreTest extends Specification { def 'should store and get a value' () { given: - def value = new TaskOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", 1234) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) when: store.save('/some/key', value) then: @@ -63,22 +62,22 @@ class H2CidStoreTest extends Specification { def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") - def time = Instant.ofEpochMilli(1234567).toString() + def time = Instant.ofEpochMilli(1234567) def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) def key2 = "testKey2" - def value2 = new WorkflowOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key1: "value1", key2: "value2"]) + def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://run", 1234, time, time, [key1: "value1", key2: "value2"]) def key3 = "testKey3" - def value3 = new WorkflowOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key2: "value2", key3: "value3"]) + def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://run", 1234, time, time, [key2: "value2", key3: "value3"]) def key4 = "testKey4" - def value4 = new WorkflowOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", 1234, time, time, [key3: "value3", key4: "value4"]) + def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://run", 1234, time, time, [key3: "value3", key4: "value4"]) store.save(key, value1) store.save(key2, value2) store.save(key3, value3) store.save(key4, value4) when: - def results = store.search("type=WorkflowOutput&annotations.key2=value2") + def results = store.search("type=DataOutput&annotations.key2=value2") then: results.size() == 2 } From 4073298d1ea65cf31a4bc1541b664df69bb5d861 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 5 Apr 2025 19:49:36 +0200 Subject: [PATCH 32/72] Just blanks [ci skip] Signed-off-by: Paolo Di Tommaso --- modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index a93c4cc19a..c041ef3a3a 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -152,7 +152,6 @@ class CidObserver implements TraceObserver { return normalizedParams } - @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { storeTaskInfo(handler.task) @@ -265,7 +264,6 @@ class CidObserver implements TraceObserver { protected String getWorkflowOutputKey(Path destination) { final rel = getWorkflowRelative(destination) return executionHash + SEPARATOR + 'outputs' + SEPARATOR + rel - } protected String getTaskRelative(TaskRun task, Path path){ @@ -281,7 +279,6 @@ class CidObserver implements TraceObserver { throw new Exception("Cannot asses the relative path for output $path of ${task.name}" ) return path.normalize().toString() } - } private String getTaskRelative0(TaskRun task, Path path){ @@ -398,7 +395,6 @@ class CidObserver implements TraceObserver { throw new Exception("Cannot asses the relative path for workflow output $path") return path.normalize().toString() } - } protected List manageInputs(Map inputs, PathNormalizer normalizer) { From 1b991ecb47a17faea31a2c2b0b2e948f16c933c6 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 5 Apr 2025 20:36:22 +0200 Subject: [PATCH 33/72] Clean up Signed-off-by: Paolo Di Tommaso --- .../main/nextflow/data/cid/CidObserver.groovy | 79 ++++++++++--------- .../nextflow/data/cid/CidObserverTest.groovy | 20 +++-- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index c041ef3a3a..8b8261410b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -28,14 +28,13 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.Session import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.TaskOutputs import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun -import nextflow.data.cid.serde.CidEncoder import nextflow.file.FileHelper import nextflow.file.FileHolder import nextflow.processor.TaskHandler @@ -65,7 +64,6 @@ class CidObserver implements TraceObserver { private Session session private WorkflowOutputs workflowResults private Map outputsStoreDirCid = new HashMap(10) - private CidEncoder encoder = new CidEncoder() CidObserver(Session session, CidStore store){ this.session = session @@ -179,7 +177,8 @@ class CidObserver implements TraceObserver { outputs.forEach { OutParam key, Object value -> if (key instanceof FileOutParam) { outputParams.add( new Parameter( key.class.simpleName, key.name, manageFileOutParams(value, task) ) ) - } else { + } + else { if( value instanceof Path ) outputParams.add( new Parameter( key.class.simpleName, key.name, normalizer.normalizePath( value as Path ) ) ) else if ( value instanceof CharSequence ) @@ -191,7 +190,7 @@ class CidObserver implements TraceObserver { return outputParams } - private Object manageFileOutParams( Object value, TaskRun task) { + private Object manageFileOutParams(Object value, TaskRun task) { if (value instanceof Path) { return asUriString(storeTaskOutput(task, (Path) value)) } @@ -202,6 +201,11 @@ class CidObserver implements TraceObserver { } return files } + // unexpected task output + final msg = value!=null + ? "Unexepected output [${value.getClass().getName()}] '${value}' for task '${task.name}'" + : "Unexpected output null for task '${task.name}'" + throw new IllegalArgumentException(msg) } protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { @@ -251,7 +255,7 @@ class CidObserver implements TraceObserver { store.save(key, value) return key } catch (Throwable e) { - log.warn("Exception storing CID output $path for task ${task.name}. ${e.getLocalizedMessage()}") + log.warn("Unexpected error storing CID output '${path.toUriString()}' for task '${task.name}'", e) return path.toUriString() } } @@ -269,35 +273,36 @@ class CidObserver implements TraceObserver { protected String getTaskRelative(TaskRun task, Path path){ if (path.isAbsolute()) { final rel = getTaskRelative0(task, path) - if (rel) return rel - throw new Exception("Cannot asses the relative path for output $path of ${task.name}") - } else { - //Check if contains workdir or storeDir - final rel = getTaskRelative0(task, path.toAbsolutePath()) - if (rel) return rel - if (path.normalize().getName(0).toString() == "..") - throw new Exception("Cannot asses the relative path for output $path of ${task.name}" ) - return path.normalize().toString() + if (rel) + return rel + throw new IllegalArgumentException("Cannot access the relative path for output '${path.toUriString()}' and task '${task.name}'") } + //Check if contains workdir or storeDir + final rel = getTaskRelative0(task, path.toAbsolutePath()) + if (rel) return rel + if (path.normalize().getName(0).toString() == "..") + throw new IllegalArgumentException("Cannot access the relative path for output '${path.toUriString()}' and task '${task.name}'" ) + return path.normalize().toString() } private String getTaskRelative0(TaskRun task, Path path){ final workDirAbsolute = task.workDir.toAbsolutePath() - if (path.startsWith(workDirAbsolute)) { - return workDirAbsolute.relativize(path).toString() - } - //If task output is not in the workDir check if output is stored in the task's storeDir - final storeDir = task.getConfig().getStoreDir().toAbsolutePath() - if( storeDir && path.startsWith(storeDir)) { - final rel = storeDir.relativize(path) - //If output stored in storeDir, keep the path in case it is used as workflow output - this.outputsStoreDirCid.put(path.toString(), asUriString(task.hash.toString(),rel.toString())) - return rel - } + if (path.startsWith(workDirAbsolute)) { + return workDirAbsolute.relativize(path).toString() + } + //If task output is not in the workDir check if output is stored in the task's storeDir + final storeDir = task.getConfig().getStoreDir().toAbsolutePath() + if( storeDir && path.startsWith(storeDir) ) { + final rel = storeDir.relativize(path) + //If output stored in storeDir, keep the path in case it is used as workflow output + this.outputsStoreDirCid.put(path.toString(), asUriString(task.hash.toString(),rel.toString())) + return rel + } + return null } protected BasicFileAttributes readAttributes(Path path) { - Files.readAttributes(path, BasicFileAttributes) + return Files.readAttributes(path, BasicFileAttributes) } @Override @@ -326,7 +331,7 @@ class CidObserver implements TraceObserver { annotations) store.save(key, value) } catch (Throwable e) { - log.warn("Exception storing published file $destination for workflow ${executionHash}.", e) + log.warn("Unexpected error storing published file '${destination.toUriString()}' for workflow '${executionHash}'", e) } } @@ -383,18 +388,16 @@ class CidObserver implements TraceObserver { if (path.isAbsolute()) { if (path.startsWith(outputDirAbs)) { return outputDirAbs.relativize(path).toString() - } else { - throw new Exception("Cannot asses the relative path for workflow output $path") } - } else { - final pathAbs = path.toAbsolutePath() - if (pathAbs.startsWith(outputDirAbs)) { - return outputDirAbs.relativize(pathAbs).toString() - } - if (path.normalize().getName(0).toString() == "..") - throw new Exception("Cannot asses the relative path for workflow output $path") - return path.normalize().toString() + throw new IllegalArgumentException("Cannot access relative path for workflow output '${path.toUriString()}'") + } + final pathAbs = path.toAbsolutePath() + if (pathAbs.startsWith(outputDirAbs)) { + return outputDirAbs.relativize(pathAbs).toString() } + if (path.normalize().getName(0).toString() == "..") + throw new IllegalArgumentException("Cannot access relative path for workflow output '${path.toUriString()}'") + return path.normalize().toString() } protected List manageInputs(Map inputs, PathNormalizer normalizer) { diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 699700a6bb..1fab4b5824 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -42,6 +42,8 @@ import nextflow.script.WorkflowMetadata import nextflow.util.CacheHelper import nextflow.util.PathNormalizer import spock.lang.Specification +import spock.lang.Unroll + /** * * @author Paolo Di Tommaso @@ -208,7 +210,8 @@ class CidObserverTest extends Specification { Path.of('/path/to/work/12/3456789') | Path.of('storeDir') | Path.of('./relative') | "relative" } - def 'should return exception when relativize task output dirs' (){ + @Unroll + def 'should return exception when relativize task output dirs'() { when: def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] def store = new DefaultCidStore(); @@ -230,8 +233,8 @@ class CidObserverTest extends Specification { def observer = new CidObserver(session, store) observer.getTaskRelative(task, PATH) then: - def e = thrown(Exception) - e.message == "Cannot asses the relative path for output $PATH of ${task.name}".toString() + def e = thrown(IllegalArgumentException) + e.message == "Cannot access the relative path for output '$PATH' and task '${task.name}'".toString() where: WORK_DIR | STORE_DIR | PATH @@ -239,7 +242,7 @@ class CidObserverTest extends Specification { Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('../path/to/storeDir/relative') } - def 'should relativise workflow output dirs' (){ + def 'should relativize workflow output dirs' (){ when: def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] def store = new DefaultCidStore(); @@ -259,7 +262,8 @@ class CidObserverTest extends Specification { Path.of('/path/to/outDir') | Path.of('./relative') | "relative" } - def 'should return exception when relativise workflow output dirs' (){ + @Unroll + def 'should return exception when relativize workflow output dirs' (){ when: def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] def store = new DefaultCidStore(); @@ -270,15 +274,15 @@ class CidObserverTest extends Specification { def observer = new CidObserver(session, store) observer.getWorkflowRelative(PATH) then: - def e = thrown(Exception) - e.message == "Cannot asses the relative path for workflow output $PATH" + def e = thrown(IllegalArgumentException) + e.message == "Cannot access relative path for workflow output '$PATH'" where: OUTPUT_DIR | PATH | EXPECTED Path.of('/path/to/outDir') | Path.of('/another/path/') | "relative" Path.of('/path/to/outDir') | Path.of('../relative') | "relative" } - def 'should save workflow output' (){ + def 'should save workflow output'() { given: def folder = Files.createTempDirectory('test') def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] From d6e77a324d478cc8e84d11262dad6fa8679f0b9c Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 5 Apr 2025 20:49:46 +0200 Subject: [PATCH 34/72] Minor [ci fast] Signed-off-by: Paolo Di Tommaso --- .../nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy | 1 + modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy | 1 + .../nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy index 1aa61444a3..34ce676593 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy @@ -23,6 +23,7 @@ import nextflow.trace.TraceObserver import nextflow.trace.TraceObserverFactory /** + * Implements factory for {@link CidObserver} object * * @author Paolo Di Tommaso */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy index 613f639662..9d316e999b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy @@ -22,6 +22,7 @@ import nextflow.data.cid.serde.CidSerializable import nextflow.data.config.DataConfig /** * Interface for the CID store + * * @author Paolo Di Tommaso */ @CompileStatic diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy index e425cb5af1..0e18cff453 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy @@ -26,7 +26,7 @@ import nextflow.util.TestOnly import org.pf4j.ExtensionPoint /** - * Factory for CidStore + * Factory for {@link CidStore} objects * * @author Jorge Ejarque */ From 4058c8e5d58a7fb43b7ac14c284098413d5177b7 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 5 Apr 2025 21:23:51 +0200 Subject: [PATCH 35/72] Add checksum factory [ci fast] Signed-off-by: Paolo Di Tommaso --- .../main/nextflow/data/cid/CidObserver.groovy | 29 +++------- .../nextflow/data/cid/model/Checksum.groovy | 16 +++++- .../data/cid/model/ChecksumTest.groovy | 57 +++++++++++++++++++ 3 files changed, 80 insertions(+), 22 deletions(-) create mode 100644 modules/nf-cid/src/test/nextflow/data/cid/model/ChecksumTest.groovy diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index 8b8261410b..cd6c401df3 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -103,7 +103,7 @@ class CidObserver implements TraceObserver { final normalizer = new PathNormalizer(session.workflowMetadata) final mainScript = new DataPath( normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), - new Checksum(session.workflowMetadata.scriptId, "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) + Checksum.of(session.workflowMetadata.scriptId, "nextflow", CacheHelper.HashMode.DEFAULT()) ) List otherScripts = new LinkedList<>() for (Path p: ScriptMeta.allScriptNames().values()) { @@ -111,11 +111,7 @@ class CidObserver implements TraceObserver { otherScripts.add( new DataPath( normalizer.normalizePath(p.normalize()), - new Checksum( - CacheHelper.hasher(p.text).hash().toString(), - "nextflow", - CacheHelper.HashMode.DEFAULT().toString().toLowerCase() - ) + Checksum.ofNextflow(p.text) ) ) } @@ -209,10 +205,8 @@ class CidObserver implements TraceObserver { } protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { - final codeChecksum = new Checksum(CacheHelper.hasher(session.stubRun ? task.stubSource: task.source).hash().toString(), - "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) - final scriptChecksum = new Checksum(CacheHelper.hasher(task.script).hash().toString(), - "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) + final codeChecksum = Checksum.ofNextflow(session.stubRun ? task.stubSource: task.source) + final scriptChecksum = Checksum.ofNextflow(task.script) final value = new nextflow.data.cid.model.TaskRun( session.uniqueId.toString(), task.getName(), @@ -226,8 +220,7 @@ class CidObserver implements TraceObserver { task.processor.getTaskGlobalVars(task), task.processor.getTaskBinEntries(task.source).collect { Path p -> new DataPath( normalizer.normalizePath(p.normalize()), - new Checksum(CacheHelper.hasher(p).hash().toString(), "nextflow", - CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) ) + Checksum.ofNextflow(p) ) }, asUriString(executionHash) ) @@ -242,8 +235,7 @@ class CidObserver implements TraceObserver { try { final attrs = readAttributes(path) final key = getTaskOutputKey(task, path) - final checksum = new Checksum( CacheHelper.hasher(path).hash().toString(), - "nextflow", CacheHelper.HashMode.DEFAULT().toString().toLowerCase() ) + final checksum = Checksum.ofNextflow(path) final value = new DataOutput( path.toUriString(), checksum, @@ -312,11 +304,7 @@ class CidObserver implements TraceObserver { protected void storePublishedFile(Path destination, Path source = null, Map annotations = null){ try { - final checksum = new Checksum( - CacheHelper.hasher(destination).hash().toString(), - "nextflow", - CacheHelper.HashMode.DEFAULT().toString().toLowerCase() - ) + final checksum = Checksum.ofNextflow(destination) final key = getWorkflowOutputKey(destination) final sourceReference = source ? getSourceReference(source) : asUriString(executionHash) final attrs = readAttributes(destination) @@ -419,8 +407,7 @@ class CidObserver implements TraceObserver { final ref = getSourceReference(it.storePath) paths.add(ref ? new DataPath(ref) : new DataPath( normalizer.normalizePath(it.storePath), - new Checksum(CacheHelper.hasher(it.storePath).hash().toString(), "nextflow", - CacheHelper.HashMode.DEFAULT().toString().toLowerCase())) + Checksum.ofNextflow(it.storePath)) ) } return paths diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy index 08a91de950..44c212b66c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy @@ -17,9 +17,11 @@ package nextflow.data.cid.model +import java.nio.file.Path + import groovy.transform.Canonical import groovy.transform.CompileStatic - +import nextflow.util.CacheHelper /** * Models a checksum including the value as well as the algortihm and mode used to compute it. * @@ -31,4 +33,16 @@ class Checksum { String value String algorithm String mode + + static Checksum of(String value, String algorithm, CacheHelper.HashMode mode) { + new Checksum(value, algorithm, mode.toString().toLowerCase()) + } + + static Checksum ofNextflow(String value) { + new Checksum(CacheHelper.hasher(value).hash().toString(), 'nextflow', CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) + } + + static Checksum ofNextflow(Path path) { + new Checksum(CacheHelper.hasher(path).hash().toString(), 'nextflow', CacheHelper.HashMode.DEFAULT().toString().toLowerCase()) + } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/model/ChecksumTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/model/ChecksumTest.groovy new file mode 100644 index 0000000000..e576c7b310 --- /dev/null +++ b/modules/nf-cid/src/test/nextflow/data/cid/model/ChecksumTest.groovy @@ -0,0 +1,57 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.data.cid.model + +import nextflow.util.CacheHelper +import spock.lang.Specification + +/** + * + * @author Paolo Di Tommaso + */ +class ChecksumTest extends Specification { + + def 'should create a checksum'() { + given: + def checksum = new Checksum(algorithm: 'sha1', value: '1234567890abcdef', mode: 'hex') + + expect: + checksum.algorithm == 'sha1' + checksum.value == '1234567890abcdef' + checksum.mode == 'hex' + } + + def 'should create a checksum with of factory method'() { + given: + def checksum1 = Checksum.of('1234567890abcdef','sha1', CacheHelper.HashMode.DEFAULT()) + + expect: + checksum1.algorithm == 'sha1' + checksum1.value == '1234567890abcdef' + checksum1.mode == 'standard' + } + + def 'should create checksum with ofNextflow factory method'() { + given: + def checksum1 = Checksum.ofNextflow('1234567890abcdef') + + expect: + checksum1.algorithm == 'nextflow' + checksum1.value == CacheHelper.hasher('1234567890abcdef').hash().toString() + checksum1.mode == 'standard' + } +} From ffa3b7e38d6b1a060c49ab507f2f746290b875b6 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 5 Apr 2025 21:58:29 +0200 Subject: [PATCH 36/72] Fix typo [ci skip] Signed-off-by: Paolo Di Tommaso --- modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy index cfac30f820..4b7c0ea5e5 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy @@ -128,7 +128,7 @@ class CidHistoryFile implements CidHistoryLog { try { fos = FileChannel.open(file, StandardOpenOption.WRITE, StandardOpenOption.CREATE) } catch (UnsupportedOperationException e){ - log.warn("File System Provider for ${this.path} do not support file locking - Attemting without locking", e) + log.warn("File System Provider for ${this.path} do not support file locking - Attempting without locking", e) return action.call() } if (!fos){ From 9b4addf45ad1fb3d2f69a6e86f0485a8f69e31ae Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sat, 5 Apr 2025 22:17:31 +0200 Subject: [PATCH 37/72] Simplify code [ci fast] Signed-off-by: Paolo Di Tommaso --- .../src/main/nextflow/data/cid/CidUtils.groovy | 18 +++++++++--------- .../test/nextflow/data/cid/CidUtilsTest.groovy | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy index 4a448c26b8..39dccd12e2 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy @@ -37,6 +37,9 @@ import java.time.Instant @Slf4j @CompileStatic class CidUtils { + + private static final String[] EMPTY_ARRAY = new String[] {} + /** * Query a CID store. * @param store CID store to query. @@ -82,16 +85,13 @@ class CidUtils { * @return array with the parsed element */ static String[] parseChildrenFormFragment(String fragment) { - if (fragment) { - if (fragment.contains('.')) { - return fragment.split("\\.") - } else { - return [fragment] as String[] - } - } else { - return [] as String[] - } + if( !fragment ) + return EMPTY_ARRAY + return fragment.contains('.') + ? fragment.split("\\.") + : List.of(fragment) as String[] } + /** * Search for objects inside a description * @param store CID store diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy index d473297b25..f55bb6abe7 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy @@ -68,7 +68,7 @@ class CidUtilsTest extends Specification{ def "should parse children elements form Fragment string"() { expect: - CidUtils.parseChildrenFormFragment(FRAGMENT) == EXPECTED + CidUtils.parseChildrenFormFragment(FRAGMENT) == EXPECTED as String[] where: FRAGMENT | EXPECTED From 5692b67a84feab793ef567afb6f1a27c20ad096c Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Mon, 7 Apr 2025 11:22:28 +0200 Subject: [PATCH 38/72] Simplify cid workflow run scripts (#5949) Signed-off-by: Paolo Di Tommaso --- .../main/nextflow/data/cid/CidObserver.groovy | 39 ++++++++----- .../nextflow/data/cid/model/Workflow.groovy | 3 +- .../nextflow/data/cid/CidObserverTest.groovy | 57 ++++++++++++++++++- .../nextflow/data/cid/CidUtilsTest.groovy | 4 +- .../data/cid/DefaultCidStoreTest.groovy | 2 +- .../data/cid/serde/CidEncoderTest.groovy | 10 ++-- .../data/cid/h2/H2CidStoreTest.groovy | 2 +- 7 files changed, 88 insertions(+), 29 deletions(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index cd6c401df3..e8e0ca993b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -99,29 +99,38 @@ class CidObserver implements TraceObserver { } } - protected String storeWorkflowRun() { + protected Collection allScriptFiles() { + return ScriptMeta.allScriptNames().values() + } + + protected List collectScriptDataPaths() { + final allScripts = allScriptFiles() + final result = new ArrayList(allScripts.size()+1) final normalizer = new PathNormalizer(session.workflowMetadata) - final mainScript = new DataPath( + // the main script + result.add( new DataPath( normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), Checksum.of(session.workflowMetadata.scriptId, "nextflow", CacheHelper.HashMode.DEFAULT()) - ) - List otherScripts = new LinkedList<>() - for (Path p: ScriptMeta.allScriptNames().values()) { - if (p && p != session.workflowMetadata.scriptFile) { - otherScripts.add( - new DataPath( - normalizer.normalizePath(p.normalize()), - Checksum.ofNextflow(p.text) - ) - ) - } + ) ) + + // all other scripts + for (Path it: allScripts) { + if( it==null || it == session.workflowMetadata.scriptFile ) + continue + final dataPath = new DataPath(normalizer.normalizePath(it.normalize()), Checksum.ofNextflow(it.text)) + result.add(dataPath) } + return result + } + protected String storeWorkflowRun() { + final normalizer = new PathNormalizer(session.workflowMetadata) + // create the workflow object holding script files and repo tracking info final workflow = new Workflow( - mainScript, - otherScripts, + collectScriptDataPaths(), session.workflowMetadata.repository, session.workflowMetadata.commitId ) + // create the workflow run main object final value = new WorkflowRun( workflow, session.uniqueId.toString(), diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy index 3688b879f7..08c11cacc5 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy @@ -30,8 +30,7 @@ import nextflow.data.cid.serde.CidSerializable @Canonical @CompileStatic class Workflow implements CidSerializable { - DataPath mainScriptFile - List otherScriptFiles + List scriptFiles String repository String commitId } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 1fab4b5824..77697dc532 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -26,8 +26,8 @@ import java.nio.file.attribute.BasicFileAttributes import com.google.common.hash.HashCode import nextflow.Session import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.DataOutput +import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun @@ -38,18 +38,69 @@ import nextflow.processor.TaskId import nextflow.processor.TaskProcessor import nextflow.processor.TaskRun import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta import nextflow.script.WorkflowMetadata import nextflow.util.CacheHelper import nextflow.util.PathNormalizer import spock.lang.Specification import spock.lang.Unroll - /** * * @author Paolo Di Tommaso */ class CidObserverTest extends Specification { + def 'should collect script files' () { + given: + def folder = Files.createTempDirectory('test') + and: + def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] + def store = new DefaultCidStore(); + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def module1 = folder.resolve("script1.nf"); module1.text = 'hola' + def module2 = folder.resolve("script2.nf"); module2.text = 'world' + and: + + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> folder.resolve("workDir") + } + def session = Mock(Session) { + getConfig() >> config + getUniqueId() >> uniqueId + getRunName() >> "test_run" + getWorkflowMetadata() >> metadata + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(DataConfig.create(session)) + def observer = Spy(new CidObserver(session, store)) + + when: + def files = observer.collectScriptDataPaths() + then: + observer.allScriptFiles() >> [ scriptFile, module1, module2 ] + and: + files.size() == 3 + and: + files[0].path == "file://${scriptFile.toString()}" + files[0].checksum == new Checksum("78910", "nextflow", "standard") + and: + files[1].path == "file://$module1" + files[1].checksum == Checksum.ofNextflow(module1.text) + and: + files[2].path == "file://$module2" + files[2].checksum == Checksum.ofNextflow(module2.text) + + cleanup: + ScriptMeta.reset() + folder?.deleteDir() + } + def 'should save workflow' (){ given: def folder = Files.createTempDirectory('test') @@ -75,7 +126,7 @@ class CidObserverTest extends Specification { store.open(DataConfig.create(session)) def observer = new CidObserver(session, store) def mainScript = new DataPath("file://${scriptFile.toString()}", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow(mainScript, [],"https://nextflow.io/nf-test/", "123456" ) + def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) def workflowRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", []) when: observer.onFlowCreate(session) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy index f55bb6abe7..b71accdf3b 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy @@ -50,7 +50,7 @@ class CidUtilsTest extends Specification{ given: def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) @@ -149,7 +149,7 @@ class CidUtilsTest extends Specification{ given: def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) def cidStore = new DefaultCidStore() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index 9a7259b908..450403002c 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -106,7 +106,7 @@ class DefaultCidStoreTest extends Specification { def uniqueId = UUID.randomUUID() def time = Instant.ofEpochMilli(1234567) def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow(mainScript, [],"https://nextflow.io/nf-test/", "123456" ) + def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) def key2 = "testKey2" diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy index e450090754..a504f66e8a 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy @@ -44,7 +44,7 @@ class CidEncoderTest extends Specification{ and: def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def wfRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) when: @@ -55,10 +55,10 @@ class CidEncoderTest extends Specification{ object instanceof WorkflowRun def result = object as WorkflowRun result.workflow instanceof Workflow - result.workflow.mainScriptFile instanceof DataPath - result.workflow.mainScriptFile.path == "file://path/to/main.nf" - result.workflow.mainScriptFile.checksum instanceof Checksum - result.workflow.mainScriptFile.checksum.value == "78910" + result.workflow.scriptFiles.first instanceof DataPath + result.workflow.scriptFiles.first.path == "file://path/to/main.nf" + result.workflow.scriptFiles.first.checksum instanceof Checksum + result.workflow.scriptFiles.first.checksum.value == "78910" result.workflow.commitId == "123456" result.sessionId == uniqueId.toString() result.name == "test_run" diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy index e2b6ee3dda..8895d7762e 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy @@ -61,7 +61,7 @@ class H2CidStoreTest extends Specification { given: def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow(mainScript, [], "https://nextflow.io/nf-test/", "123456") + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def time = Instant.ofEpochMilli(1234567) def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) From c9134e4ce8d858d0e51d00283d7c09f3cea9fe7f Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Fri, 11 Apr 2025 21:09:48 +0200 Subject: [PATCH 39/72] Cid store improve coverage, fixes and others (#5956) Signed-off-by: jorgee Co-authored-by: Paolo Di Tommaso --- .../main/groovy/nextflow/cli/CmdCid.groovy | 29 ++ .../groovy/nextflow/cli/CmdCidTest.groovy | 50 +- .../nextflow/data/cid/CidHistoryFile.groovy | 170 ------- .../nextflow/data/cid/CidHistoryRecord.groovy | 9 - .../main/nextflow/data/cid/CidObserver.groovy | 98 ++-- .../data/cid/CidPropertyValidator.groovy | 60 +++ .../main/nextflow/data/cid/CidStore.groovy | 6 +- .../main/nextflow/data/cid/CidUtils.groovy | 161 ++++--- .../data/cid/DefaultCidHistoryLog.groovy | 88 ++++ .../nextflow/data/cid/DefaultCidStore.groovy | 12 +- .../data/cid/cli/CidCommandImpl.groovy | 202 ++++---- .../data/cid/fs/CidFileSystemProvider.groovy | 173 ++++--- .../main/nextflow/data/cid/fs/CidPath.groovy | 70 +-- .../nextflow/data/cid/model/DataOutput.groovy | 33 +- .../nextflow/data/cid/model/DataPath.groovy | 6 + .../data/cid/model/TaskOutputs.groovy | 15 + .../nextflow/data/cid/model/TaskRun.groovy | 39 ++ .../nextflow/data/cid/model/Workflow.groovy | 9 + .../data/cid/model/WorkflowOutputs.groovy | 12 + .../data/cid/model/WorkflowRun.groovy | 15 + .../nextflow/data/cid/CidObserverTest.groovy | 147 +++++- .../data/cid/CidPropertyValidationTest.groovy | 40 ++ .../nextflow/data/cid/CidUtilsTest.groovy | 74 ++- ...groovy => DefaultCidHistoryLogTest.groovy} | 51 +- .../data/cid/DefaultCidStoreTest.groovy | 19 +- .../data/cid/cli/CidCommandImplTest.groovy | 450 ++++++++++++++++++ .../cid/fs/CidFileSystemProviderTest.groovy | 134 +++++- .../nextflow/data/cid/fs/CidPathTest.groovy | 236 ++++++++- .../data/cid/serde/CidEncoderTest.groovy | 3 +- .../nextflow/data/cid/h2/H2CidStore.groovy | 8 +- .../data/cid/h2/H2CidStoreTest.groovy | 8 +- 31 files changed, 1878 insertions(+), 549 deletions(-) delete mode 100644 modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy create mode 100644 modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy rename modules/nf-cid/src/test/nextflow/data/cid/{CidHistoryFileTest.groovy => DefaultCidHistoryLogTest.groovy} (66%) create mode 100644 modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy index 48c384bd23..35d064e417 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy @@ -44,6 +44,7 @@ class CmdCid extends CmdBase implements UsageAware { void show(ConfigMap config, List args) void lineage(ConfigMap config, List args) void diff(ConfigMap config, List args) + void find(ConfigMap config, List args) } interface SubCmd { @@ -64,6 +65,7 @@ class CmdCid extends CmdBase implements UsageAware { commands << new CmdShow() commands << new CmdLineage() commands << new CmdDiff() + commands << new CmdFind() } @Parameter(hidden = true) @@ -259,4 +261,31 @@ class CmdCid extends CmdBase implements UsageAware { } + class CmdFind implements SubCmd { + + @Override + String getName() { 'find' } + + @Override + String getDescription() { + return 'Find CID metadata descriptions matching with a query' + } + + void apply(List args) { + if (args.size() != 1) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + operation.find(config, args) + } + + @Override + void usage() { + println description + println "Usage: nextflow $NAME $name " + } + + } + } diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index 0f65e0f053..7b8f5b7c30 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -17,6 +17,7 @@ package nextflow.cli +import nextflow.data.cid.DefaultCidHistoryLog import nextflow.data.cid.serde.CidEncoder import java.nio.file.Files @@ -70,14 +71,14 @@ class CmdCidTest extends Specification { def configFile = folder.resolve('nextflow.config') configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() def historyFile = folder.resolve(".meta/.history") - Files.createDirectories(historyFile.parent) + def cidLog = new DefaultCidHistoryLog(historyFile) def uniqueId = UUID.randomUUID() def date = new Date(); def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } + cidLog.write("run_name", uniqueId, "cid://123456", date) def recordEntry = "${CidHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://123456".toString() - historyFile.text = recordEntry when: def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) cidCmd.run() @@ -138,7 +139,7 @@ class CmdCidTest extends Specification { def time = Instant.ofEpochMilli(123456789) def encoder = new CidEncoder().withPrettyPrint(true) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam","cid://123987/", 1234, time, time, null) + "cid://123987/file.bam","cid://12345/","cid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer cidFile.text = jsonSer @@ -209,10 +210,10 @@ class CmdCidTest extends Specification { def encoder = new CidEncoder() def time = Instant.ofEpochMilli(123456789) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://45678", 1234, time, time, null) + "cid://123987/file.bam", "cid://45678",null, 1234, time, time, null) cidFile.text = encoder.encode(entry) entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987", "cid://123987", 1234, time, time, null) + "cid://123987", "cid://45678", "cid://123987", 1234, time, time, null) cidFile2.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), @@ -222,7 +223,7 @@ class CmdCidTest extends Specification { null, null, null, null, [:],[], null) cidFile3.text = encoder.encode(entry) entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://45678", "cid://45678", 1234, time, time, null) + "cid://45678", "cid://45678", null, 1234, time, time, null) cidFile4.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), @@ -280,7 +281,7 @@ class CmdCidTest extends Specification { def encoder = new CidEncoder().withPrettyPrint(true) def time = Instant.ofEpochMilli(123456789) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://123987/", 1234, time, time, null) + "cid://123987/file.bam", "cid://12345", "cid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer cidFile.text = jsonSer @@ -302,4 +303,39 @@ class CmdCidTest extends Specification { folder?.deleteDir() } + def 'should show query results'(){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() + def cidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def encoder = new CidEncoder().withPrettyPrint(true) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://12345", "cid://123987/", 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + cidFile.text = jsonSer + when: + def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid:///?type=DataOutput"]) + cidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + + cleanup: + folder?.deleteDir() + } + } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy deleted file mode 100644 index 4b7c0ea5e5..0000000000 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryFile.groovy +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package nextflow.data.cid - -import java.nio.channels.FileChannel -import java.nio.channels.FileLock -import java.nio.file.Files -import java.nio.file.Path -import java.nio.file.StandardOpenOption - -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j -import nextflow.extension.FilesEx -/** - * File to store a history of the workflow executions and their corresponding CIDs - * - * @author Jorge Ejarque - */ -@Slf4j -@CompileStatic -class CidHistoryFile implements CidHistoryLog { - - Path path - - CidHistoryFile(Path file) { - this.path = file - } - - void write(String name, UUID key, String runCid, Date date = null) { - assert key - - withFileLock { - def timestamp = date ?: new Date() - log.trace("Writting record for $key in CID history file ${FilesEx.toUriString(this.path)}") - path << new CidHistoryRecord(timestamp, name, key, runCid).toString() << '\n' - } - } - - void updateRunCid(UUID sessionId, String runCid) { - assert sessionId - - try { - withFileLock { updateRunCid0(sessionId, runCid) } - } - catch (Throwable e) { - log.warn "Can't update CID history file: ${FilesEx.toUriString(this.path)}", e.message - } - } - - List getRecords(){ - List list = new LinkedList() - try { - withFileLock { this.path.eachLine {list.add(CidHistoryRecord.parse(it)) } } - } - catch (Throwable e) { - log.warn "Can't read records from CID history file: ${FilesEx.toUriString(this.path)}", e.message - } - return list - } - - - CidHistoryRecord getRecord(UUID id) { - assert id - - for (String line : this.path.readLines()) { - def current = line ? CidHistoryRecord.parse(line) : null - if (current.sessionId == id) { - return current - } - } - log.warn("Can't find session $id in CID history file ${FilesEx.toUriString(this.path)}") - return null - } - - - private void updateRunCid0(UUID id, String runCid) { - assert id - def newHistory = new StringBuilder() - - for( String line : this.path.readLines()) { - try { - def current = line ? CidHistoryRecord.parse(line) : null - if (current.sessionId == id) { - log.trace("Updating record for $id in CID history file ${FilesEx.toUriString(this.path)}") - final newRecord = new CidHistoryRecord(current.timestamp, current.runName, current.sessionId, runCid) - newHistory << newRecord.toString() << '\n' - } else { - newHistory << line << '\n' - } - } - catch (IllegalArgumentException e) { - log.warn("Can't read CID history file: ${FilesEx.toUriString(this.path)}", e.message) - } - } - - // rewrite the history content - this.path.setText(newHistory.toString()) - } - - /** - * Apply the given action by using a file lock - * - * @param action The closure implementing the action to be executed with a file lock - * @return The value returned by the action closure - */ - protected withFileLock(Closure action) { - - def rnd = new Random() - long ts = System.currentTimeMillis() - final parent = this.path.parent ?: Path.of('.').toAbsolutePath() - Files.createDirectories(parent) - def file = parent.resolve("${this.path.name}.lock".toString()) - FileChannel fos - try { - fos = FileChannel.open(file, StandardOpenOption.WRITE, StandardOpenOption.CREATE) - } catch (UnsupportedOperationException e){ - log.warn("File System Provider for ${this.path} do not support file locking - Attempting without locking", e) - return action.call() - } - if (!fos){ - throw new IllegalStateException("Can't create a file channel for ${FilesEx.toUriString(this.path)}") - } - try { - Throwable error - FileLock lock = null - - try { - while (true) { - lock = fos.tryLock() - if (lock) break - if (System.currentTimeMillis() - ts < 1_000) - sleep rnd.nextInt(75) - else { - error = new IllegalStateException("Can't lock file: ${FilesEx.toUriString(this.path)} - Nextflow needs to run in a file system that supports file locks") - break - } - } - if (lock) { - return action.call() - } - } - catch (Exception e) { - return action.call() - } - finally { - if (lock?.isValid()) lock.release() - } - - if (error) throw error - } - finally { - fos.closeQuietly() - file.delete() - } - } -} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy index 544ee26ac5..03390fca62 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy @@ -43,11 +43,6 @@ class CidHistoryRecord { this.runCid = runCid } - CidHistoryRecord(UUID sessionId, String name = null) { - this.runName = name - this.sessionId = sessionId - } - protected CidHistoryRecord() {} List toList() { @@ -65,13 +60,9 @@ class CidHistoryRecord { static CidHistoryRecord parse(String line) { def cols = line.tokenize('\t') - if (cols.size() == 2) - return new CidHistoryRecord(UUID.fromString(cols[0])) - if (cols.size() == 4) { return new CidHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3]) } - throw new IllegalArgumentException("Not a valid history entry: `$line`") } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index e8e0ca993b..508fac8f83 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -50,6 +50,7 @@ import nextflow.trace.TraceRecord import nextflow.util.CacheHelper import nextflow.util.PathNormalizer import nextflow.util.TestOnly + /** * Observer to write the generated workflow metadata in a CID store. * @@ -64,6 +65,7 @@ class CidObserver implements TraceObserver { private Session session private WorkflowOutputs workflowResults private Map outputsStoreDirCid = new HashMap(10) + private PathNormalizer normalizer CidObserver(Session session, CidStore store){ this.session = session @@ -78,9 +80,16 @@ class CidObserver implements TraceObserver { @TestOnly String getExecutionHash(){ executionHash } + @TestOnly + String setExecutionHash(String hash){ this.executionHash = hash } + + @TestOnly + String setNormalizer(PathNormalizer normalizer){ this.normalizer = normalizer } + @Override void onFlowBegin() { - executionHash = storeWorkflowRun() + normalizer = new PathNormalizer(session.workflowMetadata) + executionHash = storeWorkflowRun(normalizer) final executionUri = asUriString(executionHash) workflowResults = new WorkflowOutputs( Instant.now(), @@ -103,10 +112,9 @@ class CidObserver implements TraceObserver { return ScriptMeta.allScriptNames().values() } - protected List collectScriptDataPaths() { + protected List collectScriptDataPaths(PathNormalizer normalizer) { final allScripts = allScriptFiles() final result = new ArrayList(allScripts.size()+1) - final normalizer = new PathNormalizer(session.workflowMetadata) // the main script result.add( new DataPath( normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), @@ -122,11 +130,11 @@ class CidObserver implements TraceObserver { } return result } - protected String storeWorkflowRun() { - final normalizer = new PathNormalizer(session.workflowMetadata) + + protected String storeWorkflowRun(PathNormalizer normalizer) { // create the workflow object holding script files and repo tracking info final workflow = new Workflow( - collectScriptDataPaths(), + collectScriptDataPaths(normalizer), session.workflowMetadata.repository, session.workflowMetadata.commitId ) @@ -142,36 +150,39 @@ class CidObserver implements TraceObserver { return executionHash } - private static List getNormalizedParams(Map params, PathNormalizer normalizer){ + protected static List getNormalizedParams(Map params, PathNormalizer normalizer){ final normalizedParams = new LinkedList() params.each{String key, Object value -> - if( value instanceof Path ) - normalizedParams.add( new Parameter( Path.class.simpleName, key, normalizer.normalizePath( value as Path ) ) ) - else if ( value instanceof CharSequence ) - normalizedParams.add( new Parameter( String.class.simpleName, key, normalizer.normalizePath( value.toString() ) ) ) - else - normalizedParams.add( new Parameter( value.class.simpleName, key, value) ) + addNormalizedParam(key, value, normalizer, normalizedParams) } return normalizedParams } + private static void addNormalizedParam(String key, Object value, PathNormalizer normalizer, List normalizedParams){ + if( value instanceof Path ) + normalizedParams.add( new Parameter( Path.class.simpleName, key, normalizer.normalizePath( value as Path ) ) ) + else if ( value instanceof CharSequence ) + normalizedParams.add( new Parameter( String.class.simpleName, key, normalizer.normalizePath( value.toString() ) ) ) + else + normalizedParams.add( new Parameter( value.class.simpleName, key, value) ) + } + @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { storeTaskInfo(handler.task) } protected void storeTaskInfo(TaskRun task) { - final pathNormalizer = new PathNormalizer(session.workflowMetadata) // store the task run entry - storeTaskRun(task, pathNormalizer) + storeTaskRun(task, normalizer) // store all task results - storeTaskResults(task, pathNormalizer) + storeTaskResults(task, normalizer) } protected String storeTaskResults(TaskRun task, PathNormalizer normalizer){ final outputParams = getNormalizedTaskOutputs(task, normalizer) final value = new TaskOutputs( asUriString(task.hash.toString()), asUriString(executionHash), Instant.now(), outputParams ) - final key = CacheHelper.hasher(value).hash().toString() + final key = task.hash.toString() + SEPARATOR + 'outputs' store.save(key,value) return key } @@ -180,22 +191,28 @@ class CidObserver implements TraceObserver { final outputs = task.getOutputs() final outputParams = new LinkedList() outputs.forEach { OutParam key, Object value -> - if (key instanceof FileOutParam) { - outputParams.add( new Parameter( key.class.simpleName, key.name, manageFileOutParams(value, task) ) ) - } - else { - if( value instanceof Path ) - outputParams.add( new Parameter( key.class.simpleName, key.name, normalizer.normalizePath( value as Path ) ) ) - else if ( value instanceof CharSequence ) - outputParams.add( new Parameter( key.class.simpleName, key.name, normalizer.normalizePath( value.toString() ) ) ) - else - outputParams.add( new Parameter( key.class.simpleName, key.name, value) ) - } + manageTaskOutputParameter(key, outputParams, value, task, normalizer) } return outputParams } - private Object manageFileOutParams(Object value, TaskRun task) { + private void manageTaskOutputParameter(OutParam key, LinkedList outputParams, value, TaskRun task, PathNormalizer normalizer) { + if (key instanceof FileOutParam) { + outputParams.add(new Parameter(key.class.simpleName, key.name, manageFileOutParam(value, task))) + } else { + if (value instanceof Path) + outputParams.add(new Parameter(key.class.simpleName, key.name, normalizer.normalizePath(value as Path))) + else if (value instanceof CharSequence) + outputParams.add(new Parameter(key.class.simpleName, key.name, normalizer.normalizePath(value.toString()))) + else + outputParams.add(new Parameter(key.class.simpleName, key.name, value)) + } + } + + private Object manageFileOutParam(Object value, TaskRun task) { + if (value == null) { + throw new IllegalArgumentException("Unexpected output null for task '${task.name}'") + } if (value instanceof Path) { return asUriString(storeTaskOutput(task, (Path) value)) } @@ -207,10 +224,7 @@ class CidObserver implements TraceObserver { return files } // unexpected task output - final msg = value!=null - ? "Unexepected output [${value.getClass().getName()}] '${value}' for task '${task.name}'" - : "Unexpected output null for task '${task.name}'" - throw new IllegalArgumentException(msg) + throw new IllegalArgumentException("Unexpected output [${value.getClass().getName()}] '${value}' for task '${task.name}'") } protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { @@ -221,7 +235,7 @@ class CidObserver implements TraceObserver { task.getName(), codeChecksum, scriptChecksum, - task.inputs ? manageInputs(task.inputs, normalizer): null, + task.inputs ? manageTaskInputParameters(task.inputs, normalizer): null, task.isContainerEnabled() ? task.getContainerFingerprint(): null, normalizer.normalizePath(task.getCondaEnv()), normalizer.normalizePath(task.getSpackEnv()), @@ -249,6 +263,7 @@ class CidObserver implements TraceObserver { path.toUriString(), checksum, asUriString(task.hash.toString()), + asUriString(executionHash), asUriString(task.hash.toString()), attrs.size(), CidUtils.toDate(attrs?.creationTime()), @@ -322,6 +337,7 @@ class CidObserver implements TraceObserver { checksum, sourceReference, asUriString(executionHash), + null, attrs.size(), CidUtils.toDate(attrs?.creationTime()), CidUtils.toDate(attrs?.lastModifiedTime()), @@ -379,8 +395,14 @@ class CidObserver implements TraceObserver { void onFilePublish(Path destination, Path source, Map annotations){ storePublishedFile( destination, source, annotations) } - - protected String getWorkflowRelative(Path path){ + /** + * Relativizes a path from the workflow's output dir. + * + * @param path Path to relativize + * @return Path String with the relative path + * @throws IllegalArgumentException + */ + protected String getWorkflowRelative(Path path) throws IllegalArgumentException{ final outputDirAbs = session.outputDir.toAbsolutePath() if (path.isAbsolute()) { if (path.startsWith(outputDirAbs)) { @@ -397,7 +419,7 @@ class CidObserver implements TraceObserver { return path.normalize().toString() } - protected List manageInputs(Map inputs, PathNormalizer normalizer) { + protected List manageTaskInputParameters(Map inputs, PathNormalizer normalizer) { List managedInputs = new LinkedList() inputs.forEach{ param, value -> final type = param.class.simpleName @@ -414,7 +436,7 @@ class CidObserver implements TraceObserver { final paths = new LinkedList(); for( FileHolder it : files ) { final ref = getSourceReference(it.storePath) - paths.add(ref ? new DataPath(ref) : new DataPath( + paths.add(ref ? ref : new DataPath( normalizer.normalizePath(it.storePath), Checksum.ofNextflow(it.storePath)) ) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy new file mode 100644 index 0000000000..7a6bd674c3 --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy @@ -0,0 +1,60 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.data.cid + +import groovy.util.logging.Slf4j +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.TaskOutputs +import nextflow.data.cid.model.TaskRun +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowOutputs +import nextflow.data.cid.model.WorkflowRun + +import java.lang.reflect.Field + +/** + * Class to validate if the string refers to a property in the classes of te CID Metadata model. + * @author Jorge Ejarque + */ +class CidPropertyValidator { + + private static List CID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum] + private Set validProperties + + CidPropertyValidator(){ + this.validProperties = new HashSet() + for( Class clazz: CID_MODEL_CLASSES) { + for( Field field: clazz.declaredFields) { + validProperties.add( field.name) + } + } + } + + void validate(String[] properties) { + for(String property: properties) { + if (!(property in this.validProperties)) { + throw new IllegalArgumentException("Property '$property' doesn't exist in the CID model") + } + } + } + + + + +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy index 9d316e999b..bfff2f9928 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy @@ -57,8 +57,10 @@ interface CidStore extends Closeable { /** * Search for cid entries. * @queryString Json-path like query string. (Only simple and nested field operators are supported(No array, wildcards,etc.) - * @return List of Cid object's fulfilling the queryString + * @return Map fulfilling the queryString */ - List search(String queryString) + Map search(String queryString) + + } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy index 39dccd12e2..482fdeafde 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy @@ -48,25 +48,13 @@ class CidUtils { * - Key: Element where the query will be applied. '/' indicates query will be applied in all the elements of the CID store. * - QueryString: all param-value pairs that the CID element should fulfill in a URI's query string format. * - Fragment: Element fragment to retrieve. - * @return List of object fulfilling the query + * @return Collection of object fulfilling the query */ - static List query(CidStore store, URI uri) { + static Collection query(CidStore store, URI uri) { String key = uri.authority ? uri.authority + uri.path : uri.path try { if (key == CidPath.SEPARATOR) { - final results = store.search(uri.query) - if (results && uri.fragment){ - // If fragment is defined get the property of the object indicated by the fragment - final filteredResults = [] - results.forEach { - final output = navigate(it, uri.fragment) - if (output){ - filteredResults.add(output) - } - } - return filteredResults - } - return results + return globalSearch(store, uri) } else { final parameters = uri.query ? parseQuery(uri.query) : null final children = parseChildrenFormFragment(uri.fragment) @@ -79,6 +67,26 @@ class CidUtils { } + private static Collection globalSearch(CidStore store, URI uri) { + final results = store.search(uri.query).values() + if (results && uri.fragment) { + // If fragment is defined get the property of the object indicated by the fragment + return filterResults(results, uri.fragment) + } + return results + } + + private static List filterResults(Collection results, String fragment) { + final filteredResults = [] + results.forEach { + final output = navigate(it, fragment) + if (output) { + filteredResults.add(output) + } + } + return filteredResults + } + /** * Get the array of the search path children elements from the fragment string * @param fragment String containing the elements separated by '.' @@ -87,9 +95,8 @@ class CidUtils { static String[] parseChildrenFormFragment(String fragment) { if( !fragment ) return EMPTY_ARRAY - return fragment.contains('.') - ? fragment.split("\\.") - : List.of(fragment) as String[] + final children = fragment.tokenize('.') + return children as String[] } /** @@ -101,25 +108,28 @@ class CidUtils { * @return List of object */ protected static List searchPath(CidStore store, String key, Map params, String[] children = []) { - final results = new LinkedList() final object = store.load(key) - if (object) { - if (children && children.size() > 0) { - final output = getSubObject(store, key, object, children) - if (output) { - treatObject(output, params, results) - } else { - throw new FileNotFoundException("Cid object $key#${children.join('.')} not found.") - } - } else { - treatObject(object, params, results) - } - } else { + if (!object) { throw new FileNotFoundException("Cid object $key not found.") } + final results = new LinkedList() + if (children && children.size() > 0) { + treatSubObject(store, key, object, children, params, results) + } else { + treatObject(object, params, results) + } + return results } + private static void treatSubObject(CidStore store, String key, CidSerializable object, String[] children, Map params, LinkedList results) { + final output = getSubObject(store, key, object, children) + if (!output) { + throw new FileNotFoundException("Cid object $key#${children.join('.')} not found.") + } + treatObject(output, params, results) + } + /** * Get a metadata sub-object. * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. @@ -134,13 +144,13 @@ class CidUtils { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description final outputs = store.load("${key}/outputs") - if (outputs) - return navigate(outputs, children.join('.')) - else + if (!outputs) return null + return navigate(outputs, children.join('.')) } return navigate(object, children.join('.')) } + /** * Check if the Cid pseudo path or query is for Task or Workflow outputs. * @@ -148,7 +158,7 @@ class CidUtils { * @param children Array of string in indicating the properties to navigate to get the sub-object. * @return return 'true' if the parent is a Task/Workflow run and the first element in children is 'outputs'. Otherwise 'false' */ - public static boolean isSearchingOutputs(CidSerializable object, String[] children) { + static boolean isSearchingOutputs(CidSerializable object, String[] children) { return (object instanceof WorkflowRun || object instanceof TaskRun) && children && children[0] == 'outputs' } @@ -175,12 +185,13 @@ class CidUtils { * @return Map containing the parameter-value pairs of the query string. */ static Map parseQuery(String queryString) { - if (queryString) { - return queryString.split('&').collectEntries { - it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } - } as Map + if( !queryString ) { + return [:] } - return [:] + return queryString.split('&').collectEntries { + it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } + } as Map + } /** @@ -189,16 +200,29 @@ class CidUtils { * @param params parameter-value pairs to evaluate * @return true if all object parameters exist and matches with the value, otherwise false. */ - static boolean checkParams(Object object, Map params) { - for (final entry : params.entrySet()) { + static boolean checkParams(Object object, Map params) { + for( final entry : params.entrySet() ) { final value = navigate(object, entry.key) - if (!value || value.toString() != entry.value.toString() ) { + if( !checkParam(value, entry.value) ) { return false } } return true } + private static boolean checkParam(Object value, Object expected) { + if( !value ) + return false + if( value instanceof Collection ) { + for( def v : value as Collection ) { + if( v.toString() == expected.toString() ) + return true + } + return false + } + return value.toString() == expected.toString() + } + /** * Retrieves the sub-object or value indicated by a path. * @param obj Object to navigate @@ -208,22 +232,12 @@ class CidUtils { static Object navigate(Object obj, String path){ if (!obj) return null - try{ - // type has been replaced by class when evaluating CidSerializable objects - if (obj instanceof CidSerializable && path == 'type') + // type has been replaced by class when evaluating CidSerializable objects + if (obj instanceof CidSerializable && path == 'type') return obj.getClass()?.simpleName - path.tokenize('.').inject(obj) { current, key -> - if (current == null) return null - - if (current instanceof Map) { - return current[key] // Navigate Map properties - } - - if (current.metaClass.hasProperty(current, key)) { - return current.getAt(key) // Navigate Object properties - } - log.trace("No property found for $key") - return null // Property not found + try{ + return path.tokenize('.').inject(obj) { current, key -> + return getSubPath(current, key) } } catch (Throwable e) { log.debug("Error navigating to $path in object", e) @@ -231,6 +245,37 @@ class CidUtils { } } + private static Object getSubPath(current, String key) { + if (current == null) + return null + + if (current instanceof Map) { + return current[key] // Navigate Map properties + } + if (current instanceof Collection) { + return navigateCollection(current, key) + } + if (current.metaClass.hasProperty(current, key)) { + return current.getAt(key) // Navigate Object properties + } + log.debug("No property found for $key") + return null + } + + private static Object navigateCollection(Collection collection, String key) { + def results = [] + for (Object object: collection){ + final res = getSubPath(object, key) + if (res) results.add(res) + } + if (results.isEmpty() ) { + log.trace("No property found for $key") + return null + } + // Return a single object if only ine results is found. + return results.size() == 1 ? results[0] : results + } + /** * Helper function to convert from FileTime to ISO 8601. * diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy new file mode 100644 index 0000000000..6b3bbeda9c --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy @@ -0,0 +1,88 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import java.nio.file.Files +import java.nio.file.Path + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.extension.FilesEx +/** + * File to store a history of the workflow executions and their corresponding CIDs + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class DefaultCidHistoryLog implements CidHistoryLog { + + Path path + + DefaultCidHistoryLog(Path folder) { + this.path = folder + if( !path.exists() ) + Files.createDirectories(path) + } + + void write(String name, UUID key, String runCid, Date date = null) { + assert key + def timestamp = date ?: new Date() + final recordFile = path.resolve(key.toString()) + try { + recordFile.text = new CidHistoryRecord(timestamp, name, key, runCid).toString() + log.trace("Record for $key written in CID history log ${FilesEx.toUriString(this.path)}") + }catch (Throwable e) { + log.warn("Can't write record $key file ${FilesEx.toUriString(recordFile)}", e.message) + } + } + + void updateRunCid(UUID id, String runCid) { + assert id + final recordFile = path.resolve(id.toString()) + try { + def current = CidHistoryRecord.parse(path.resolve(id.toString()).text) + recordFile.text = new CidHistoryRecord(current.timestamp, current.runName, id, runCid).toString() + } + catch (Throwable e) { + log.warn("Can't read session $id file: ${FilesEx.toUriString(recordFile)}", e.message) + } + } + + List getRecords(){ + List list = new LinkedList() + try { + this.path.eachFile { Path file -> list.add(CidHistoryRecord.parse(file.text))} + } + catch (Throwable e) { + log.warn "Exception reading records from CID history folder: ${FilesEx.toUriString(this.path)}", e.message + } + return list.sort {it.timestamp } + } + + CidHistoryRecord getRecord(UUID id) { + assert id + final recordFile = path.resolve(id.toString()) + try { + return CidHistoryRecord.parse(recordFile.text) + } catch( Throwable e ) { + log.warn("Can't find session $id in file: ${FilesEx.toUriString(recordFile)}", e.message) + return null + } + } + +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy index 6540ce5818..c421023ae1 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy @@ -59,7 +59,7 @@ class DefaultCidStore implements CidStore { if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { throw new AbortOperationException("Unable to create CID store directory: $metaLocation") } - historyLog = new CidHistoryFile(metaLocation.resolve(HISTORY_FILE_NAME)) + historyLog = new DefaultCidHistoryLog(metaLocation.resolve(HISTORY_FILE_NAME)) return this } @@ -105,18 +105,16 @@ class DefaultCidStore implements CidStore { void close() throws IOException { } @Override - List search(String queryString) { - + Map search(String queryString) { def params = null if (queryString) { params = CidUtils.parseQuery(queryString) } return searchAllFiles(params) - } - private List searchAllFiles (Map params) { - final results = new LinkedList() + private Map searchAllFiles (Map params) { + final results = new HashMap() Files.walkFileTree(metaLocation, new FileVisitor() { @@ -130,7 +128,7 @@ class DefaultCidStore implements CidStore { if (file.name.startsWith('.data.json') ) { final cidObject = encoder.decode(file.text) if (CidUtils.checkParams(cidObject, params)){ - results.add(cidObject as CidSerializable) + results.put(metaLocation.relativize(file.getParent()).toString(), cidObject as CidSerializable) } } FileVisitResult.CONTINUE diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index 78b95e6d9d..bbda78a659 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -17,6 +17,8 @@ package nextflow.data.cid.cli +import nextflow.data.cid.serde.CidEncoder + import static nextflow.data.cid.fs.CidPath.* import java.nio.charset.StandardCharsets @@ -42,6 +44,7 @@ import org.eclipse.jgit.diff.DiffAlgorithm import org.eclipse.jgit.diff.DiffFormatter import org.eclipse.jgit.diff.RawText import org.eclipse.jgit.diff.RawTextComparator + /** * Implements CID command line operations * @@ -70,61 +73,61 @@ class CidCommandImpl implements CmdCid.CidCommand { private void printHistory(CidStore store) { final records = store.historyLog?.records - if( records ) { - def table = new TableBuilder(cellSeparator: '\t') - .head('TIMESTAMP') - .head('RUN NAME') - .head('SESSION ID') - .head('RUN CID') - for( CidHistoryRecord record: records ){ - table.append(record.toList()) - } - println table.toString() - } else { + if( !records ) { println("No workflow runs CIDs found.") + return + } + def table = new TableBuilder(cellSeparator: '\t') + .head('TIMESTAMP') + .head('RUN NAME') + .head('SESSION ID') + .head('RUN CID') + for (CidHistoryRecord record : records) { + table.append(record.toList()) } + println table.toString() } @Override void show(ConfigMap config, List args) { - if (!isCidUri(args[0])) + if( !isCidUri(args[0]) ) throw new Exception("Identifier is not a CID URL") final store = CidStoreFactory.getOrCreate(new Session(config)) - if (store) { - try { - def entries = CidUtils.query(store, new URI(args[0])) - if( entries ) { - entries = entries.size() == 1 ? entries[0] : entries - println CidUtils.encodeSearchOutputs(entries, true) - } else { - println "No entries found for ${args[0]}." - } - } catch (Throwable e) { - println "Error loading ${args[0]}. ${e.message}" - } - } else { + if ( !store ) { println "Error CID store not loaded. Check Nextflow configuration." + return + } + try { + def entries = CidUtils.query(store, new URI(args[0])) + if( !entries ) { + println "No entries found for ${args[0]}." + return + } + entries = entries.size() == 1 ? entries[0] : entries + println CidUtils.encodeSearchOutputs(entries, true) + } catch (Throwable e) { + println "Error loading ${args[0]}. ${e.message}" } } @Override void lineage(ConfigMap config, List args) { + final store = CidStoreFactory.getOrCreate(new Session(config)) + if( !store ) { + println "Error CID store not loaded. Check Nextflow configuration." + return + } try { - final store = CidStoreFactory.getOrCreate(new Session(config)) - final template = MermaidHtmlRenderer.readTemplate() - final network = getLineage(store, args[0]) - Path file = Path.of(args[1]) - file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) + renderLineage(store, args[0], Path.of(args[1])) println("Linage graph for ${args[0]} rendered in ${args[1]}") } catch (Throwable e) { println("ERROR: rendering lineage graph. ${e.message}") } } - private String getLineage(CidStore store, String dataCid) { + private void renderLineage(CidStore store, String dataCid, Path file) { def lines = [] as List lines << "flowchart BT".toString() - final nodesToRender = new LinkedList() nodesToRender.add(dataCid) final edgesToRender = new LinkedList() @@ -135,7 +138,9 @@ class CidCommandImpl implements CmdCid.CidCommand { lines << "" edgesToRender.each { lines << " ${it.source} -->${it.destination}".toString() } lines << "" - return lines.join('\n') + lines.join('\n') + final template = MermaidHtmlRenderer.readTemplate() + file.text = template.replace('REPLACE_WITH_NETWORK_DATA', lines.join('\n')) } private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { @@ -145,44 +150,15 @@ class CidCommandImpl implements CmdCid.CidCommand { final cidObject = store.load(key) switch (cidObject.getClass()) { case DataOutput: - lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); - final source = (cidObject as DataOutput).source - if (source) { - if (isCidUri(source)) { - nodes.add(source) - edges.add(new Edge(source, nodeToRender)) - } else { - final label = convertToLabel(source) - lines << " ${source}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(source, nodeToRender)) - } - } + processDataOutput(cidObject as DataOutput, lines, nodeToRender, nodes, edges) break; case WorkflowRun: - final wfRun = cidObject as WorkflowRun - lines << "${nodeToRender}@{shape: processes, label: \"${wfRun.name}\"}".toString() - final parameters = wfRun.params - parameters.each { - final label = convertToLabel(it.value.toString()) - lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(it.value.toString(), nodeToRender)) - } + processWorkflowRun(cidObject as WorkflowRun, lines, nodeToRender, edges) break case TaskRun: - final taskRun = cidObject as TaskRun - lines << " ${nodeToRender}@{shape: process, label: \"${taskRun.name}\"}".toString() - final parameters = taskRun.inputs - for (Parameter source: parameters){ - if (source.type.equals(FileInParam.simpleName)) { - manageFileInParam(lines, nodeToRender, nodes, edges, source.value) - } else { - final label = convertToLabel(source.value.toString()) - lines << " ${source.value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(source.value.toString(), nodeToRender)) - } - } + processTaskRun(cidObject as TaskRun, lines, nodeToRender, nodes, edges) break default: @@ -190,6 +166,45 @@ class CidCommandImpl implements CmdCid.CidCommand { } } + private void processTaskRun(TaskRun taskRun, List lines, String nodeToRender, LinkedList nodes, LinkedList edges) { + lines << " ${nodeToRender}@{shape: process, label: \"${taskRun.name}\"}".toString() + final parameters = taskRun.inputs + for (Parameter source : parameters) { + if (source.type.equals(FileInParam.simpleName)) { + manageFileInParam(lines, nodeToRender, nodes, edges, source.value) + } else { + final label = convertToLabel(source.value.toString()) + lines << " ${source.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(source.value.toString(), nodeToRender)) + } + } + } + + private void processWorkflowRun(WorkflowRun wfRun, List lines, String nodeToRender, LinkedList edges) { + lines << " ${nodeToRender}@{shape: processes, label: \"${wfRun.name}\"}".toString() + final parameters = wfRun.params + parameters.each { + final label = convertToLabel(it.value.toString()) + lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(it.value.toString(), nodeToRender)) + } + } + + private void processDataOutput(DataOutput cidObject, List lines, String nodeToRender, LinkedList nodes, LinkedList edges){ + lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); + final source = cidObject.source + if(! source ) + return + if (isCidUri(source)) { + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) + } else { + final label = convertToLabel(source) + lines << " ${source}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(source, nodeToRender)) + } + } + private String convertToLabel(String label){ return label.replace('http', 'h\u200Ettp') } @@ -207,7 +222,7 @@ class CidCommandImpl implements CmdCid.CidCommand { return } } - if (value instanceof Map) { + if (value instanceof Map ) { if (value.path) { final path = value.path.toString() if (isCidUri(path)) { @@ -233,26 +248,27 @@ class CidCommandImpl implements CmdCid.CidCommand { throw new Exception("Identifier is not a CID URL") final store = CidStoreFactory.getOrCreate(new Session(config)) - if (store) { - try { - final key1 = args[0].substring(CID_PROT.size()) - final entry1 = store.load(key1) as String - if( !entry1 ){ - println "No entry found for ${args[0]}." - return - } - final key2 = args[1].substring(CID_PROT.size()) - final entry2 = store.load(key2) as String - if( !entry2 ) { - println "No entry found for ${args[1]}." - return - } - generateDiff(entry1, key1, entry2, key2) - } catch (Throwable e) { - println "Error generating diff between ${args[0]}: $e.message" - } - } else { + if (!store) { println "Error CID store not loaded. Check Nextflow configuration." + return + } + try { + final key1 = args[0].substring(CID_PROT.size()) + final entry1 = store.load(key1) + if (!entry1) { + println "No entry found for ${args[0]}." + return + } + final key2 = args[1].substring(CID_PROT.size()) + final entry2 = store.load(key2) + if (!entry2) { + println "No entry found for ${args[1]}." + return + } + final encoder = new CidEncoder().withPrettyPrint(true) + generateDiff(encoder.encode(entry1), key1, encoder.encode(entry2), key2) + } catch (Throwable e) { + println "Error generating diff between ${args[0]}: $e.message" } } @@ -285,5 +301,17 @@ class CidCommandImpl implements CmdCid.CidCommand { println output.toString() } - -} + @Override + void find(ConfigMap config, List args) { + final store = CidStoreFactory.getOrCreate(new Session(config)) + if (!store) { + println "Error CID store not loaded. Check Nextflow configuration." + return + } + try { + println CidUtils.encodeSearchOutputs(store.search(args[0]).keySet().collect {asUriString(it)}, true) + } catch (Throwable e){ + println "Exception searching for ${args[0]}. ${e.message}" + } + } +} \ No newline at end of file diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy index 7761a3bb7d..0ea52625d0 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -18,6 +18,7 @@ package nextflow.data.cid.fs import java.nio.ByteBuffer +import java.nio.channels.NonWritableChannelException import java.nio.channels.SeekableByteChannel import java.nio.file.AccessDeniedException import java.nio.file.AccessMode @@ -59,23 +60,25 @@ class CidFileSystemProvider extends FileSystemProvider { protected CidPath toCidPath(Path path) { if (path !instanceof CidPath) throw new ProviderMismatchException() + if (path instanceof CidMetadataPath) + return (CidMetadataPath) path return (CidPath) path } private void checkScheme(URI uri) { final scheme = uri.scheme.toLowerCase() - if( scheme != getScheme() ) + if (scheme != getScheme()) throw new IllegalArgumentException("Not a valid ${getScheme().toUpperCase()} scheme: $scheme") } @Override synchronized FileSystem newFileSystem(URI uri, Map config) throws IOException { checkScheme(uri) - if( !fileSystem ) { + if (!fileSystem) { //Overwrite default values with provided configuration final defaultConfig = DataConfig.asMap() if (config) { - for (Map.Entry e : config.entrySet()) { + for (Map.Entry e : config.entrySet()) { defaultConfig.put(e.key, e.value) } } @@ -93,7 +96,7 @@ class CidFileSystemProvider extends FileSystemProvider { synchronized FileSystem getFileSystemOrCreate(URI uri) { checkScheme(uri) - if( !fileSystem ) { + if (!fileSystem) { fileSystem = (CidFileSystem) newFileSystem(uri, DataConfig.asMap()) } return fileSystem @@ -112,72 +115,96 @@ class CidFileSystemProvider extends FileSystemProvider { @Override InputStream newInputStream(Path path, OpenOption... options) throws IOException { final cid = toCidPath(path) + if (cid instanceof CidMetadataPath) + return (cid as CidMetadataPath).newInputStream() + return newInputStream0(cid, options) + } + + private static InputStream newInputStream0(CidPath cid, OpenOption... options) throws IOException { final realPath = cid.getTargetPath(true) if (realPath instanceof CidMetadataPath) return (realPath as CidMetadataPath).newInputStream() - else - return realPath.fileSystem.provider().newInputStream(realPath, options) + return realPath.fileSystem.provider().newInputStream(realPath, options) } @Override SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { final cid = toCidPath(path) - if (options.size() > 0) { - for (OpenOption opt: options) { - // All OpenOption values except for APPEND and WRITE are allowed - if (opt == StandardOpenOption.APPEND || opt == StandardOpenOption.WRITE) - throw new UnsupportedOperationException("'$opt' not allowed"); - } - } - final realPath = cid.getTargetPath(true) + validateOptions(options) + return newByteChannel0(cid, options, attrs) + } + + + private class CidPathSeekableByteChannel implements SeekableByteChannel { SeekableByteChannel channel - if (realPath instanceof CidMetadataPath){ - channel = (realPath as CidMetadataPath).newSeekableByteChannel() - } else { - channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) + + CidPathSeekableByteChannel(SeekableByteChannel channel) { + this.channel = channel } - new SeekableByteChannel() { + @Override + int read(ByteBuffer dst) throws IOException { + return channel.read(dst) + } - @Override - int read(ByteBuffer dst) throws IOException { - return channel.read(dst) - } + @Override + int write(ByteBuffer src) throws IOException { + throw new NonWritableChannelException(){} + } - @Override - int write(ByteBuffer src) throws IOException { - throw new UnsupportedOperationException("Write operation not supported") - } + @Override + long position() throws IOException { + return channel.position() + } - @Override - long position() throws IOException { - return channel.position() - } + @Override + SeekableByteChannel position(long newPosition) throws IOException { + channel.position(newPosition) + return this + } - @Override - SeekableByteChannel position(long newPosition) throws IOException { - throw new UnsupportedOperationException("Position operation not supported") - } + @Override + long size() throws IOException { + return channel.size() + } - @Override - long size() throws IOException { - return channel.size() - } + @Override + SeekableByteChannel truncate(long unused) throws IOException { + throw new NonWritableChannelException() + } - @Override - SeekableByteChannel truncate(long unused) throws IOException { - throw new UnsupportedOperationException("Truncate operation not supported") - } + @Override + boolean isOpen() { + return channel.isOpen() + } - @Override - boolean isOpen() { - return channel.isOpen() - } + @Override + void close() throws IOException { + channel.close() + } + } - @Override - void close() throws IOException { - channel.close() - } + private static void validateOptions(Set options) { + if (!options || options.empty) + return + for (OpenOption opt : options) { + // All OpenOption values except for APPEND and WRITE are allowed + if (opt == StandardOpenOption.APPEND || opt == StandardOpenOption.WRITE) + throw new UnsupportedOperationException("'$opt' not allowed"); + } + + } + + private SeekableByteChannel newByteChannel0(CidPath cid, Set options, FileAttribute... attrs) { + if (cid instanceof CidMetadataPath) { + return (cid as CidMetadataPath).newSeekableByteChannel() + } + final realPath = cid.getTargetPath(true) + if (realPath instanceof CidMetadataPath) { + return (realPath as CidMetadataPath).newSeekableByteChannel() + } else { + SeekableByteChannel channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) + return new CidPathSeekableByteChannel(channel) } } @@ -186,9 +213,9 @@ class CidFileSystemProvider extends FileSystemProvider { final cid = toCidPath(path) final real = cid.getTargetPath(false) final stream = real - .getFileSystem() - .provider() - .newDirectoryStream(real, new CidFilter(fileSystem)) + .getFileSystem() + .provider() + .newDirectoryStream(real, new CidFilter(fileSystem)) return new DirectoryStream() { @@ -203,11 +230,12 @@ class CidFileSystemProvider extends FileSystemProvider { } } } + private class CidFilter implements DirectoryStream.Filter { private final CidFileSystem fs - CidFilter(CidFileSystem fs){ + CidFilter(CidFileSystem fs) { this.fs = fs } @@ -217,9 +245,9 @@ class CidFileSystemProvider extends FileSystemProvider { } } - private static CidPath fromRealToCidPath(Path toConvert, Path realBase, CidPath cidBase){ + private static CidPath fromRealToCidPath(Path toConvert, Path realBase, CidPath cidBase) { if (toConvert.isAbsolute()) { - if (toConvert.class != realBase.class){ + if (toConvert.class != realBase.class) { throw new ProviderMismatchException() } final relative = realBase.relativize(toConvert) @@ -292,19 +320,29 @@ class CidFileSystemProvider extends FileSystemProvider { @Override void checkAccess(Path path, AccessMode... modes) throws IOException { + validateAccessModes(modes) final cid = toCidPath(path) - for( AccessMode m : modes ) { - if( m == AccessMode.WRITE ) - throw new AccessDeniedException("Write mode not supported") - if( m == AccessMode.EXECUTE ) - throw new AccessDeniedException("Execute mode not supported") - } + if (cid instanceof CidMetadataPath) + return + checkAccess0(cid, modes) + } + + private void checkAccess0(CidPath cid, AccessMode... modes) { final real = cid.getTargetPath(true) if (real instanceof CidMetadataPath) return real.fileSystem.provider().checkAccess(real, modes) } + private void validateAccessModes(AccessMode... modes) { + for (AccessMode m : modes) { + if (m == AccessMode.WRITE) + throw new AccessDeniedException("Write mode not supported") + if (m == AccessMode.EXECUTE) + throw new AccessDeniedException("Execute mode not supported") + } + } + @Override V getFileAttributeView(Path path, Class type, LinkOption... options) { return null @@ -313,11 +351,16 @@ class CidFileSystemProvider extends FileSystemProvider { @Override A readAttributes(Path path, Class type, LinkOption... options) throws IOException { final cid = toCidPath(path) + if (cid instanceof CidMetadataPath) + return (cid as CidMetadataPath).readAttributes(type) + readAttributes0(cid, type, options) + } + + private A readAttributes0(CidPath cid, Class type, LinkOption... options) throws IOException { final real = cid.getTargetPath(true) if (real instanceof CidMetadataPath) return (real as CidMetadataPath).readAttributes(type) - else - return real.fileSystem.provider().readAttributes(real,type,options) + return real.fileSystem.provider().readAttributes(real, type, options) } @Override diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index 4364eb97ca..49e0d3f7aa 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -18,6 +18,7 @@ package nextflow.data.cid.fs import groovy.util.logging.Slf4j +import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataOutput import nextflow.data.cid.serde.CidSerializable import nextflow.file.RealPathAware @@ -113,23 +114,29 @@ class CidPath implements Path, RealPathAware { return first } - private static void validateHash(DataOutput cidObject) { + protected static void validateDataOutput(DataOutput cidObject) { final hashedPath = FileHelper.toCanonicalPath(cidObject.path as String) if( !hashedPath.exists() ) throw new FileNotFoundException("Target path $cidObject.path does not exists.") - if( cidObject.checksum ) { - final checksum = cidObject.checksum - if( checksum.algorithm in SUPPORTED_CHECKSUM_ALGORITHMS ){ - - final hash = checksum.mode - ? CacheHelper.hasher(hashedPath,CacheHelper.HashMode.of(checksum.mode.toString().toLowerCase())).hash().toString() - : CacheHelper.hasher(hashedPath).hash().toString() - if( hash != checksum.value ) - log.warn("Checksum of $hashedPath does not match with the one stored in the metadata") - } else { - log.warn("Checksum of $hashedPath can not be validated. Algorithm ${checksum.algorithm} is not supported") - } + validateChecksum(cidObject.checksum, hashedPath) + } + + protected static void validateChecksum(Checksum checksum, Path hashedPath) { + if( !checksum) + return + if( ! isAlgorithmSupported(checksum.algorithm) ) { + log.warn("Checksum of '$hashedPath' can't be validated. Algorithm '${checksum.algorithm}' is not supported") + return } + final hash = checksum.mode + ? CacheHelper.hasher(hashedPath, CacheHelper.HashMode.of(checksum.mode.toString().toLowerCase())).hash().toString() + : CacheHelper.hasher(hashedPath).hash().toString() + if (hash != checksum.value) + log.warn("Checksum of '$hashedPath' does not match with the one stored in the metadata") + } + + protected static isAlgorithmSupported( String algorithm ){ + return algorithm && algorithm in SUPPORTED_CHECKSUM_ALGORITHMS } @TestOnly @@ -167,18 +174,19 @@ class CidPath implements Path, RealPathAware { return findTarget(fs, parent.toString(), false, newChildren as String[]) } } - throw new FileNotFoundException("Target path $filePath does not exists.") + throw new FileNotFoundException("Target path '$filePath' does not exists.") } protected static Path getMetadataAsTargetPath(CidSerializable results, CidFileSystem fs, String filePath, String[] children){ - if( results ) { - if( children && children.size() > 0 ) { - return getSubObjectAsPath(fs, filePath, results, children) - }else { - return generateCidMetadataPath(fs, filePath, results, children) - } + if( !results ) { + throw new FileNotFoundException("Target path '$filePath' does not exist.") + } + if (children && children.size() > 0) { + return getSubObjectAsPath(fs, filePath, results, children) + } else { + return generateCidMetadataPath(fs, filePath, results, children) } - throw new FileNotFoundException("Target path $filePath does not exists.") + } /** @@ -195,10 +203,10 @@ class CidPath implements Path, RealPathAware { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description final outputs = fs.cidStore.load("${key}/outputs") - if( outputs ) { - return generateCidMetadataPath(fs, key, outputs, children) - } else - throw new FileNotFoundException("Target path $key#outputs does not exists.") + if( !outputs ) { + throw new FileNotFoundException("Target path '$key#outputs' does not exist.") + } + return generateCidMetadataPath(fs, key, outputs, children) } else { return generateCidMetadataPath(fs, key, object, children) } @@ -207,21 +215,21 @@ class CidPath implements Path, RealPathAware { private static CidMetadataPath generateCidMetadataPath(CidFileSystem fs, String key, Object object, String[] children){ def creationTime = FileTime.from(navigate(object, 'createdAt') as Instant ?: Instant.now()) final output = children ? navigate(object, children.join('.')) : object - if( output ){ - return new CidMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) + if( !output ) { + throw new FileNotFoundException("Target path '$key#${children.join('.')}' does not exist.") } - throw new FileNotFoundException("Target path $key#${children.join('.')} does not exists.") + return new CidMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) } private static Path getTargetPathFromOutput(DataOutput object, String[] children) { final cidObject = object as DataOutput // return the real path stored in the metadata - validateHash(cidObject) + validateDataOutput(cidObject) def realPath = FileHelper.toCanonicalPath(cidObject.path as String) if (children && children.size() > 0) realPath = realPath.resolve(children.join(SEPARATOR)) if (!realPath.exists()) - throw new FileNotFoundException("Target path $realPath does not exists.") + throw new FileNotFoundException("Target path '$realPath' does not exist.") return realPath } @@ -257,7 +265,7 @@ class CidPath implements Path, RealPathAware { if( !path || path==SEPARATOR) return "" //Remove repeated elements - path = Path.of(path).normalize().toString() + path = Path.of(path.trim()).normalize().toString() //Remove initial and final separators if( path.startsWith(SEPARATOR) ) path = path.substring(1) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy index a2b6d24554..f455dee6a2 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy @@ -31,12 +31,43 @@ import java.time.Instant @Canonical @CompileStatic class DataOutput implements CidSerializable { + /** + * Real path of the output data. + */ String path + /** + * Checksum of the output data. + */ Checksum checksum + /** + * Entity that generated the data. Possible entities are: + * - a DataOutput if the workflow published from a task data. + * - a TaskRun if the data is a task output. + * - a WorkflowRun if the data is generated by the workflow (e.g., an index file). + */ String source - String run + /** + * Reference to the WorkflowRun that generated the data. + */ + String workflowRun + /** + * Reference to the task that generated the data. + */ + String taskRun + /** + * Size of the data. + */ long size + /** + * Data creation date. + */ Instant createdAt + /** + * Data last modified date. + */ Instant modifiedAt + /** + * Annotations attached to the data + */ Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy index 38068da0e4..45a2462d6a 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy @@ -28,6 +28,12 @@ import groovy.transform.CompileStatic @Canonical @CompileStatic class DataPath { + /** + * Real path of the output data. + */ String path + /** + * Checksum of the output data. + */ Checksum checksum } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy index a80cf11b64..ebf6a6522b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy @@ -31,9 +31,24 @@ import java.time.Instant @Canonical @CompileStatic class TaskOutputs implements CidSerializable { + /** + * Reference to the task that generated the data. + */ String taskRun + /** + * Reference to the WorkflowRun that generated the data. + */ String workflowRun + /** + * Creation date of this task outputs description + */ Instant createdAt + /** + * Outputs of the task + */ List outputs + /** + * Annotations attached to the task outputs + */ Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy index 6711d9616c..b20094cd06 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy @@ -29,17 +29,56 @@ import nextflow.data.cid.serde.CidSerializable @Canonical @CompileStatic class TaskRun implements CidSerializable { + /** + * Execution session identifier + */ String sessionId + /** + * Task name + */ String name + /** + * Checksum of the task source code + */ Checksum codeChecksum + /** + * Checksum of the task script + */ Checksum scriptChecksum + /** + * Task run inputs + */ List inputs + /** + * Container used for the task run + */ String container + /** + * Conda environment used for the task run + */ String conda + /** + * Spack environment used for the task run + */ String spack + /** + * Architecture defined in the Spack environment used for the task run + */ String architecture + /** + * Global variables defined in the task run + */ Map globalVars + /** + * Binaries used in the task run + */ List binEntries + /** + * Workflow run associated to the task run + */ String workflowRun + /** + * Annotations attached to the task run + */ Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy index 08c11cacc5..90947d6478 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy @@ -30,7 +30,16 @@ import nextflow.data.cid.serde.CidSerializable @Canonical @CompileStatic class Workflow implements CidSerializable { + /** + * List of script files defining a workflow + */ List scriptFiles + /** + * Workflow repository + */ String repository + /** + * Workflow commit identifier + */ String commitId } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy index fa9799d084..eaad598136 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy @@ -31,8 +31,20 @@ import java.time.Instant @Canonical @CompileStatic class WorkflowOutputs implements CidSerializable { + /** + * Creation date of the workflow outputs description + */ Instant createdAt + /** + * Workflow run that generated the outputs + */ String workflowRun + /** + * Workflow outputs + */ Map outputs + /** + * Annotations attached to the workflow outputs + */ Map annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy index 87f401d7e9..ab943cf18b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy @@ -29,9 +29,24 @@ import nextflow.data.cid.serde.CidSerializable @Canonical @CompileStatic class WorkflowRun implements CidSerializable { + /** + * Description of the workflow associated to the workflow run. + */ Workflow workflow + /** + * Session identifier used in the workflow run + */ String sessionId + /** + * Workflow run name + */ String name + /** + * Workflow parameters + */ List params + /** + * Annotations attached to the workflow run + */ Map annotations } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 77697dc532..f0f228984d 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -17,6 +17,18 @@ package nextflow.data.cid +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.TaskOutputs +import nextflow.file.FileHolder +import nextflow.processor.TaskHandler +import nextflow.script.TokenVar +import nextflow.script.params.FileInParam +import nextflow.script.params.FileOutParam +import nextflow.script.params.InParam +import nextflow.script.params.OutParam +import nextflow.script.params.ValueInParam +import nextflow.script.params.ValueOutParam + import static nextflow.data.cid.fs.CidPath.* import java.nio.file.Files @@ -49,7 +61,37 @@ import spock.lang.Unroll * @author Paolo Di Tommaso */ class CidObserverTest extends Specification { + def 'should normalize paths' (){ + given: + def folder = Files.createTempDirectory('test') + def workDir = folder.resolve("workDir") + def projectDir = folder.resolve("projectDir") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getProjectDir() >> projectDir + getWorkDir() >> workDir + } + def params = [path: workDir.resolve("path/file.txt"), sequence: projectDir.resolve("file2.txt").toString(), value: 12] + when: + def results = CidObserver.getNormalizedParams(params, new PathNormalizer(metadata)) + then: + results.size() == 3 + results.get(0).name == "path" + results.get(0).type == Path.simpleName + results.get(0).value == "work/path/file.txt" + results.get(1).name == "sequence" + results.get(1).type == "String" + results.get(1).value == projectDir.resolve("file2.txt").toString() + results.get(2).name == "value" + results.get(2).type == "Integer" + results.get(2).value == 12 + cleanup: + ScriptMeta.reset() + folder?.deleteDir() + } def 'should collect script files' () { given: def folder = Files.createTempDirectory('test') @@ -81,7 +123,7 @@ class CidObserverTest extends Specification { def observer = Spy(new CidObserver(session, store)) when: - def files = observer.collectScriptDataPaths() + def files = observer.collectScriptDataPaths(new PathNormalizer(metadata)) then: observer.allScriptFiles() >> [ scriptFile, module1, module2 ] and: @@ -142,23 +184,68 @@ class CidObserverTest extends Specification { given: def folder = Files.createTempDirectory('test') def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] - def store = new DefaultCidStore(); def uniqueId = UUID.randomUUID() + def workDir = folder.resolve("work") def session = Mock(Session) { getConfig()>>config getUniqueId()>>uniqueId getRunName()>>"test_run" + getWorkDir() >> workDir + } + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> workDir } + and: + def store = new DefaultCidStore(); store.open(DataConfig.create(session)) + and: def observer = new CidObserver(session, store) + def normalizer = new PathNormalizer(metadata) observer.executionHash = "hash" + observer.normalizer = normalizer and: - def hash = HashCode.fromInt(123456789) + def hash = HashCode.fromString("1234567890") + def taskWd = workDir.resolve('12/34567890') + Files.createDirectories(taskWd) and: def processor = Mock(TaskProcessor){ getTaskGlobalVars(_) >> [:] getTaskBinEntries(_) >> [] } + + and: 'Task Inputs' + def inputs = new LinkedHashMap() + // File from task + inputs.put(new FileInParam(null, []).bind("file1"), [new FileHolder(workDir.resolve('78/567890/file1.txt'))]) + // Normal file + def file = folder.resolve("file2.txt") + file.text = "this is a test file" + def fileHash = CacheHelper.hasher(file).hash().toString() + inputs.put(new FileInParam(null, []).bind("file2"), [new FileHolder(file)]) + //Value input + inputs.put(new ValueInParam(null, []).bind("id"), "value") + + and: 'Task Outputs' + def outputs = new LinkedHashMap() + // Single Path output + def outFile1 = taskWd.resolve('fileOut1.txt') + outFile1.text = 'some data' + def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() + def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) + outputs.put(new FileOutParam(null, []).bind(new TokenVar("file1")), outFile1) + // Collection Path output + def outFile2 = taskWd.resolve('fileOut2.txt') + outFile2.text = 'some other data' + def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() + def attrs2 = Files.readAttributes(outFile2, BasicFileAttributes) + outputs.put(new FileOutParam(null, []).bind(new TokenVar("file2")), [outFile2]) + outputs.put(new ValueOutParam(null, []).bind(new TokenVar("id")), "value") + + and: 'Task description' def task = Mock(TaskRun) { getId() >> TaskId.of(100) getName() >> 'foo' @@ -166,27 +253,58 @@ class CidObserverTest extends Specification { getProcessor() >> processor getSource() >> 'echo task source' getScript() >> 'this is the script' + getInputs() >> inputs + getOutputs() >> outputs + getWorkDir() >> taskWd + } + def handler = Mock(TaskHandler){ + getTask() >> task } + + and: 'Expected CID objects' def sourceHash = CacheHelper.hasher('echo task source').hash().toString() def scriptHash = CacheHelper.hasher('this is the script').hash().toString() - def normalizer = Mock(PathNormalizer.class) { - normalizePath( _ as Path) >> {Path p -> p?.toString()} - normalizePath( _ as String) >> {String p -> p} - } def taskDescription = new nextflow.data.cid.model.TaskRun(uniqueId.toString(), "foo", new Checksum(sourceHash, "nextflow", "standard"), new Checksum(scriptHash, "nextflow", "standard"), - null, null, null, null, null, [:], [], "cid://hash", null) + [ + new Parameter(FileInParam.simpleName, "file1", ['cid://78567890/outputs/file1.txt']), + new Parameter(FileInParam.simpleName, "file2", [[path: normalizer.normalizePath(file), checksum: [value:fileHash, algorithm: "nextflow", mode: "standard"]]]), + new Parameter(ValueInParam.simpleName, "id", "value") + ], null, null, null, null, [:], [], "cid://hash", null) + def dataOutput1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), + "cid://1234567890", "cid://hash", "cid://1234567890", attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) + def dataOutput2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), + "cid://1234567890", "cid://hash", "cid://1234567890", attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) + when: - observer.storeTaskRun(task, normalizer) + observer.onProcessComplete(handler, null ) + def taskRunResult = store.load("${hash.toString()}") + def dataOutputResult1 = store.load("${hash}/outputs/fileOut1.txt") as DataOutput + def dataOutputResult2 = store.load("${hash}/outputs/fileOut2.txt") as DataOutput + def taskOutputsResult = store.load("${hash}/outputs") as TaskOutputs then: - folder.resolve(".meta/${hash.toString()}/.data.json").text == new CidEncoder().encode(taskDescription) + taskRunResult == taskDescription + dataOutputResult1 == dataOutput1 + dataOutputResult2 == dataOutput2 + taskOutputsResult.taskRun == "cid://1234567890" + taskOutputsResult.workflowRun == "cid://hash" + taskOutputsResult.outputs.size() == 3 + taskOutputsResult.outputs.get(0).type == FileOutParam.simpleName + taskOutputsResult.outputs.get(0).name == "file1" + taskOutputsResult.outputs.get(0).value == "cid://1234567890/outputs/fileOut1.txt" + taskOutputsResult.outputs.get(1).type == FileOutParam.simpleName + taskOutputsResult.outputs.get(1).name == "file2" + taskOutputsResult.outputs.get(1).value == ["cid://1234567890/outputs/fileOut2.txt"] + taskOutputsResult.outputs.get(2).type == ValueOutParam.simpleName + taskOutputsResult.outputs.get(2).name == "id" + taskOutputsResult.outputs.get(2).value == "value" cleanup: folder?.deleteDir() } - def 'should save task output' () { + def 'should save task data output' () { given: def folder = Files.createTempDirectory('test') def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] @@ -196,6 +314,7 @@ class CidObserverTest extends Specification { } store.open(DataConfig.create(session)) def observer = Spy(new CidObserver(session, store)) + observer.executionHash = "hash" and: def workDir = folder.resolve('12/34567890') Files.createDirectories(workDir) @@ -216,7 +335,7 @@ class CidObserverTest extends Specification { and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) def output = new DataOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), - "cid://15cd5b07", "cid://15cd5b07", attrs.size(), CidUtils.toDate(attrs.creationTime()), CidUtils.toDate(attrs.lastModifiedTime()) ) + "cid://15cd5b07", "cid://hash", "cid://15cd5b07", attrs.size(), CidUtils.toDate(attrs.creationTime()), CidUtils.toDate(attrs.lastModifiedTime()) ) and: observer.readAttributes(outFile) >> attrs @@ -384,7 +503,7 @@ class CidObserverTest extends Specification { def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() def output1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), - "cid://123987/outputs/file.bam", "$CID_PROT${observer.executionHash}", + "cid://123987/outputs/file.bam", "$CID_PROT${observer.executionHash}", null, attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) folder.resolve(".meta/${observer.executionHash}/outputs/foo/file.bam/.data.json").text == encoder.encode(output1) @@ -398,7 +517,7 @@ class CidObserverTest extends Specification { observer.onWorkflowPublish("b", outFile2) then: 'Check outFile2 metadata in cid store' def output2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), - "cid://${observer.executionHash}" , "cid://${observer.executionHash}", + "cid://${observer.executionHash}" , "cid://${observer.executionHash}", null, attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) folder.resolve(".meta/${observer.executionHash}/outputs/foo/file2.bam/.data.json").text == encoder.encode(output2) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy new file mode 100644 index 0000000000..e5cf7f74ec --- /dev/null +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy @@ -0,0 +1,40 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package nextflow.data.cid + +import spock.lang.Specification + +/** + * @author Jorge Ejarque + */ +class CidPropertyValidationTest extends Specification{ + + def 'should throw exception when property does not exist'(){ + when: + new CidPropertyValidator().validate(['value','not_existing'] as String[]) + then: + def e = thrown(IllegalArgumentException) + e.message == "Property 'not_existing' doesn't exist in the CID model" + } + + def 'should not throw exception when property exist'(){ + when: + new CidPropertyValidator().validate(['value', 'outputs'] as String[]) + then: + noExceptionThrown() + } +} diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy index b71accdf3b..b127a5ee9d 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy @@ -4,6 +4,7 @@ import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Parameter import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun import nextflow.data.config.DataConfig import spock.lang.Specification @@ -23,7 +24,7 @@ class CidUtilsTest extends Specification{ def setup() { storeLocation = tempDir.resolve("store") - def configMap = [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]] + def configMap = [enabled: true, store: [location: storeLocation.toString()]] config = new DataConfig(configMap) } @@ -53,10 +54,12 @@ class CidUtilsTest extends Specification{ def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) - + def outputs1 = new WorkflowOutputs(Instant.now(), "cid://testKey", [output: "name"] ) def cidStore = new DefaultCidStore() cidStore.open(config) cidStore.save(key, value1) + cidStore.save("$key/outputs", outputs1) + when: List params = CidUtils.query(cidStore, new URI('cid://testKey#params')) then: @@ -64,6 +67,18 @@ class CidUtilsTest extends Specification{ params[0] instanceof List (params[0] as List).size() == 2 + when: + List outputs = CidUtils.query(cidStore, new URI('cid://testKey#outputs')) + then: + outputs.size() == 1 + outputs[0] instanceof Map + outputs[0]['output'] == "name" + + expect: + CidUtils.query(cidStore, new URI('cid://testKey#no-exist')) == [] + CidUtils.query(cidStore, new URI('cid://testKey#outputs.no-exist')) == [] + CidUtils.query(cidStore, new URI('cid://no-exist#something')) == [] + } def "should parse children elements form Fragment string"() { @@ -71,11 +86,11 @@ class CidUtilsTest extends Specification{ CidUtils.parseChildrenFormFragment(FRAGMENT) == EXPECTED as String[] where: - FRAGMENT | EXPECTED - "field1" | ["field1"] - "field1.field2" | ["field1", "field2"] - null | [] - "" | [] + FRAGMENT | EXPECTED + "workflow" | ["workflow"] + "workflow.repository" | ["workflow", "repository"] + null | [] + "" | [] } def "should parse a query string as Map"() { @@ -84,25 +99,40 @@ class CidUtilsTest extends Specification{ where: QUERY_STRING | EXPECTED - "key1=value1&key2=value2" | ["key1": "value1", "key2": "value2"] - "key=val with space" | ["key": "val with space"] + "type=value1&taskRun=value2" | ["type": "value1", "taskRun": "value2"] + "type=val with space" | ["type": "val with space"] "" | [:] null | [:] } def "should check params in an object"() { given: - def obj = ["field": "value", "nested": ["subfield": "subvalue"]] + def obj = [ "type": "value", "workflow": ["repository": "subvalue"], "outputs" : [ ["path":"/to/file"],["path":"file2"] ] ] expect: CidUtils.checkParams(obj, PARAMS) == EXPECTED where: PARAMS | EXPECTED - ["field": "value"] | true - ["field": "wrong"] | false - ["nested.subfield": "subvalue"] | true - ["nested.subfield": "wrong"] | false + ["type": "value"] | true + ["type": "wrong"] | false + ["workflow.repository": "subvalue"] | true + ["workflow.repository": "wrong"] | false + ["outputs.path": "wrong"] | false + ["outputs.path": "/to/file"] | true + ["outputs.path": "file2"] | true + + } + + def 'should parse query' (){ + expect: + CidUtils.parseQuery(PARAMS) == EXPECTED + where: + PARAMS | EXPECTED + "type=value" | ["type": "value"] + "workflow.repository=subvalue" | ["workflow.repository": "subvalue"] + "" | [:] + null | [:] } def "should navigate in object params"() { @@ -162,4 +192,20 @@ class CidUtilsTest extends Specification{ result == [new Parameter("String", "param1", "value1")] } + def 'should navigate' (){ + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") + def wfRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", [key: "value1"]), new Parameter("String", "param2", "value2")]) + + expect: + CidUtils.navigate(wfRun, "workflow.commitId") == "123456" + CidUtils.navigate(wfRun, "params.name") == ["param1", "param2"] + CidUtils.navigate(wfRun, "params.value.key") == "value1" + CidUtils.navigate(wfRun, "params.value.no-exist") == null + CidUtils.navigate(wfRun, "params.no-exist") == null + CidUtils.navigate(wfRun, "no-exist") == null + CidUtils.navigate(null, "something") == null + } + } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy similarity index 66% rename from modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy rename to modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy index c8385bf9c4..a80a2aed98 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryFileTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy @@ -26,36 +26,35 @@ import java.nio.file.Path * * @author Jorge Ejarque */ -class CidHistoryFileTest extends Specification { +class DefaultCidHistoryLogTest extends Specification { Path tempDir Path historyFile - CidHistoryFile cidHistoryFile + DefaultCidHistoryLog cidHistoryLog def setup() { tempDir = Files.createTempDirectory("wdir") - historyFile = tempDir.resolve("cid-history.txt") - Files.createFile(historyFile) - cidHistoryFile = new CidHistoryFile(historyFile) + historyFile = tempDir.resolve("cid-history") + cidHistoryLog = new DefaultCidHistoryLog(historyFile) } def cleanup(){ tempDir?.deleteDir() } - def "write should append a new record to the file"() { + def "write should add a new file to the history folder"() { given: UUID sessionId = UUID.randomUUID() String runName = "TestRun" String runCid = "cid://123" when: - cidHistoryFile.write(runName, sessionId, runCid) + cidHistoryLog.write(runName, sessionId, runCid) then: - def lines = Files.readAllLines(historyFile) - lines.size() == 1 - def parsedRecord = CidHistoryRecord.parse(lines[0]) + def files = historyFile.listFiles() + files.size() == 1 + def parsedRecord = CidHistoryRecord.parse(files[0].text) parsedRecord.sessionId == sessionId parsedRecord.runName == runName parsedRecord.runCid == runCid @@ -68,19 +67,19 @@ class CidHistoryFileTest extends Specification { String runCid = "cid://123" and: - cidHistoryFile.write(runName, sessionId, runCid) + cidHistoryLog.write(runName, sessionId, runCid) when: - def record = cidHistoryFile.getRecord(sessionId) + def record = cidHistoryLog.getRecord(sessionId) then: record.sessionId == sessionId record.runName == runName record.runCid == runCid } - def "should return null if session does not exist"() { + def "should return null and warn if session does not exist"() { expect: - cidHistoryFile.getRecord(UUID.randomUUID()) == null + cidHistoryLog.getRecord(UUID.randomUUID()) == null } def "update should modify existing Cid for given session"() { @@ -91,15 +90,15 @@ class CidHistoryFileTest extends Specification { String resultsCidUpdated = "results-cid-updated" and: - cidHistoryFile.write(runName, sessionId, 'run-cid-initial') + cidHistoryLog.write(runName, sessionId, 'run-cid-initial') when: - cidHistoryFile.updateRunCid(sessionId, runCidUpdated) + cidHistoryLog.updateRunCid(sessionId, runCidUpdated) then: - def lines = Files.readAllLines(historyFile) - lines.size() == 1 - def parsedRecord = CidHistoryRecord.parse(lines[0]) + def files = historyFile.listFiles() + files.size() == 1 + def parsedRecord = CidHistoryRecord.parse(files[0].text) parsedRecord.runCid == runCidUpdated } @@ -110,14 +109,14 @@ class CidHistoryFileTest extends Specification { String runName = "Run1" String runCid = "cid://123" and: - cidHistoryFile.write(runName, existingSessionId, runCid) + cidHistoryLog.write(runName, existingSessionId, runCid) when: - cidHistoryFile.updateRunCid(nonExistingSessionId, "new-cid") + cidHistoryLog.updateRunCid(nonExistingSessionId, "new-cid") then: - def lines = Files.readAllLines(historyFile) - lines.size() == 1 - def parsedRecord = CidHistoryRecord.parse(lines[0]) + def files = historyFile.listFiles() + files.size() == 1 + def parsedRecord = CidHistoryRecord.parse(files[0].text) parsedRecord.runCid == runCid } @@ -127,10 +126,10 @@ class CidHistoryFileTest extends Specification { String runName = "Run1" String runCid = "cid://123" and: - cidHistoryFile.write(runName, sessionId, runCid) + cidHistoryLog.write(runName, sessionId, runCid) when: - def records = cidHistoryFile.getRecords() + def records = cidHistoryLog.getRecords() then: records.size() == 1 records[0].sessionId == sessionId diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index 450403002c..2c14d6edb7 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -61,13 +61,13 @@ class DefaultCidStoreTest extends Specification { then: cidStore.getMetadataPath() == metaLocation historyLog != null - historyLog instanceof CidHistoryFile + historyLog instanceof DefaultCidHistoryLog } def "save should store value in the correct file location"() { given: def key = "testKey" - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://workflow", "cid://task", 1234) def cidStore = new DefaultCidStore() cidStore.open(config) @@ -83,7 +83,7 @@ class DefaultCidStoreTest extends Specification { def "load should retrieve stored value correctly"() { given: def key = "testKey" - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://workflow", "cid://task", 1234) def cidStore = new DefaultCidStore() cidStore.open(config) cidStore.save(key, value) @@ -110,11 +110,11 @@ class DefaultCidStoreTest extends Specification { def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) def key2 = "testKey2" - def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", 1234, time, time, [key1:"value1", key2:"value2"]) + def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [key1:"value1", key2:"value2"]) def key3 = "testKey3" - def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", 1234, time, time, [key2:"value2", key3:"value3"]) + def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [key2:"value2", key3:"value3"]) def key4 = "testKey4" - def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", 1234, time, time, [key3:"value3", key4:"value4"]) + def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [key3:"value3", key4:"value4"]) def cidStore = new DefaultCidStore() cidStore.open(config) @@ -124,9 +124,12 @@ class DefaultCidStoreTest extends Specification { cidStore.save(key4, value4) when: - def results3 = cidStore.search("type=DataOutput&annotations.key2=value2") + def results = cidStore.search("type=DataOutput&annotations.key2=value2") then: - results3.size() == 2 + results.size() == 2 + results.keySet().containsAll([key2,key3]) + results[key2] == value2 + results[key3] == value3 } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy new file mode 100644 index 0000000000..2c8cc062e0 --- /dev/null +++ b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy @@ -0,0 +1,450 @@ +package nextflow.data.cid.cli + +import nextflow.SysEnv +import nextflow.config.ConfigMap +import nextflow.dag.MermaidHtmlRenderer +import nextflow.data.cid.CidHistoryRecord +import nextflow.data.cid.CidStoreFactory +import nextflow.data.cid.DefaultCidHistoryLog +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataOutput +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.TaskRun +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowRun +import nextflow.data.cid.serde.CidEncoder +import nextflow.plugin.Plugins +import org.junit.Rule +import spock.lang.Specification +import spock.lang.TempDir +import test.OutputCapture +import java.nio.file.Files +import java.nio.file.Path +import java.time.Instant + +class CidCommandImplTest extends Specification{ + + @TempDir + Path tmpDir + + Path storeLocation + ConfigMap configMap + + def setup() { + // clear the environment to avoid the local env pollute the test env + SysEnv.push([:]) + storeLocation = tmpDir.resolve("store") + configMap = new ConfigMap([workflow:[ data: [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]]]]) + } + + def cleanup() { + Plugins.stop() + CidStoreFactory.reset() + SysEnv.pop() + } + + def setupSpec() { + CidStoreFactory.reset() + } + /* + * Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html + */ + @Rule + OutputCapture capture = new OutputCapture() + + def 'should print executions cids' (){ + given: + def historyFile = storeLocation.resolve(".meta/.history") + def cidLog = new DefaultCidHistoryLog(historyFile) + def uniqueId = UUID.randomUUID() + def date = new Date(); + def recordEntry = "${CidHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://123456".toString() + cidLog.write("run_name", uniqueId, "cid://123456", date) + when: + new CidCommandImpl().log(configMap) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 2 + stdout[1] == recordEntry + } + + def 'should print no history' (){ + given: + def historyFile = storeLocation.resolve(".meta/.history") + Files.createDirectories(historyFile.parent) + + when: + new CidCommandImpl().log(configMap) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('WARN') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "No workflow runs CIDs found." + + } + + def 'should show cid content' (){ + given: + def cidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def time = Instant.ofEpochMilli(123456789) + def encoder = new CidEncoder().withPrettyPrint(true) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam","cid://123987/", null, 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + cidFile.text = jsonSer + when: + new CidCommandImpl().show(configMap, ["cid://12345"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + } + + def 'should warn if no cid content' (){ + given: + + when: + new CidCommandImpl().show(configMap, ["cid://12345"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "No entries found for cid://12345." + } + + def 'should get lineage cid content' (){ + given: + + def outputHtml = tmpDir.resolve('lineage.html') + + def cidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") + def cidFile2 = storeLocation.resolve(".meta/123987/file.bam/.data.json") + def cidFile3 = storeLocation.resolve(".meta/123987/.data.json") + def cidFile4 = storeLocation.resolve(".meta/45678/output.txt/.data.json") + def cidFile5 = storeLocation.resolve(".meta/45678/.data.json") + Files.createDirectories(cidFile.parent) + Files.createDirectories(cidFile2.parent) + Files.createDirectories(cidFile3.parent) + Files.createDirectories(cidFile4.parent) + Files.createDirectories(cidFile5.parent) + def encoder = new CidEncoder() + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://45678", null, 1234, time, time, null) + cidFile.text = encoder.encode(entry) + entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987", "cid://45678", "cid://123987", 1234, time, time, null) + cidFile2.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "foo", + new Checksum("abcde2345","nextflow","standard"), + new Checksum("abfsc2375","nextflow","standard"), + [new Parameter( "ValueInParam", "sample_id","ggal_gut"), + new Parameter("FileInParam","reads",["cid://45678/output.txt"]), + new Parameter("FileInParam","input",[new DataPath("path/to/file",new Checksum("45372qe","nextflow","standard"))]) + ], + null, null, null, null, [:],[], null) + cidFile3.text = encoder.encode(entry) + entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://45678", "cid://45678", null, 1234, time, time, null) + cidFile4.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "bar", + new Checksum("abfs2556","nextflow","standard"), + new Checksum("abfsc2375","nextflow","standard"), + null,null, null, null, null, [:],[], null) + cidFile5.text = encoder.encode(entry) + final network = """flowchart BT + cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} + cid://123987/file.bam@{shape: document, label: "cid://123987/file.bam"} + cid://123987@{shape: process, label: "foo"} + ggal_gut@{shape: document, label: "ggal_gut"} + path/to/file@{shape: document, label: "path/to/file"} + cid://45678/output.txt@{shape: document, label: "cid://45678/output.txt"} + cid://45678@{shape: process, label: "bar"} + + cid://123987/file.bam -->cid://12345/file.bam + cid://123987 -->cid://123987/file.bam + ggal_gut -->cid://123987 + cid://45678/output.txt -->cid://123987 + path/to/file -->cid://123987 + cid://45678 -->cid://45678/output.txt +""" + final template = MermaidHtmlRenderer.readTemplate() + def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) + + when: + new CidCommandImpl().lineage(configMap, ["cid://12345/file.bam", outputHtml.toString()]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Linage graph for cid://12345/file.bam rendered in ${outputHtml}" + outputHtml.exists() + outputHtml.text == expectedOutput + } + + def 'should get lineage from workflow cid content' (){ + given: + + def outputHtml = tmpDir.resolve('lineage.html') + + def cidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") + def cidFile3 = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + Files.createDirectories(cidFile3.parent) + def encoder = new CidEncoder() + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://12345", "cid://12345", null, 1234, time, time, null) + cidFile.text = encoder.encode(entry) + def wf = new Workflow([new DataPath("/path/to/main.nf)")], "hello-nf", "aasdklk") + entry = new WorkflowRun(wf,"sessionId","run_name", + [new Parameter( "String", "sample_id","ggal_gut"), + new Parameter("Integer","reads",2)]) + cidFile3.text = encoder.encode(entry) + final network = """flowchart BT + cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} + cid://12345@{shape: processes, label: "run_name"} + ggal_gut@{shape: document, label: "ggal_gut"} + 2.0@{shape: document, label: "2.0"} + + cid://12345 -->cid://12345/file.bam + ggal_gut -->cid://12345 + 2.0 -->cid://12345 +""" + final template = MermaidHtmlRenderer.readTemplate() + def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) + + when: + new CidCommandImpl().lineage(configMap, ["cid://12345/file.bam", outputHtml.toString()]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Linage graph for cid://12345/file.bam rendered in ${outputHtml}" + outputHtml.exists() + outputHtml.text == expectedOutput + } + + def 'should show query results'(){ + given: + def cidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def encoder = new CidEncoder().withPrettyPrint(true) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + cidFile.text = jsonSer + when: + new CidCommandImpl().show(configMap, ["cid:///?type=DataOutput"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + } + + def 'should show query with fragment'(){ + given: + def cidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def cidFile2 = storeLocation.resolve(".meta/67890/.data.json") + Files.createDirectories(cidFile2.parent) + def encoder = new CidEncoder().withPrettyPrint(true) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + "cid://123987/file2.bam", "cid://123987/", null, 1235, time, time, null) + def expectedOutput1 = '[\n "path/to/file",\n "path/to/file2"\n]' + def expectedOutput2 = '[\n "path/to/file2",\n "path/to/file"\n]' + cidFile.text = encoder.encode(entry) + cidFile2.text = encoder.encode(entry2) + when: + new CidCommandImpl().show(configMap, ["cid:///?type=DataOutput#path"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.join('\n') == expectedOutput1 || stdout.join('\n') == expectedOutput2 + } + + def 'should diff'(){ + given: + def cidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def cidFile2 = storeLocation.resolve(".meta/67890/.data.json") + Files.createDirectories(cidFile2.parent) + def encoder = new CidEncoder().withPrettyPrint(true) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + "cid://123987/file2.bam", "cid://123987/", null, 1235, time, time, null) + cidFile.text = encoder.encode(entry) + cidFile2.text = encoder.encode(entry2) + def expectedOutput = '''diff --git 12345 67890 +--- 12345 ++++ 67890 +@@ -1,15 +1,15 @@ + { + "type": "DataOutput", +- "path": "path/to/file", ++ "path": "path/to/file2", + "checksum": { +- "value": "45372qe", ++ "value": "42472qet", + "algorithm": "nextflow", + "mode": "standard" + }, +- "source": "cid://123987/file.bam", ++ "source": "cid://123987/file2.bam", + "workflowRun": "cid://123987/", + "taskRun": null, +- "size": 1234, ++ "size": 1235, + "createdAt": "1970-01-02T10:17:36.789Z", + "modifiedAt": "1970-01-02T10:17:36.789Z", + "annotations": null +''' + + when: + new CidCommandImpl().diff(configMap, ["cid://12345", "cid://67890"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.join('\n') == expectedOutput + } + + def 'should print error if no entry found diff'(){ + given: + def cidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(cidFile.parent) + def encoder = new CidEncoder().withPrettyPrint(true) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + cidFile.text = encoder.encode(entry) + + when: + new CidCommandImpl().diff(configMap, ["cid://89012", "cid://12345"]) + new CidCommandImpl().diff(configMap, ["cid://12345", "cid://67890"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 2 + stdout[0] == "No entry found for cid://89012." + stdout[1] == "No entry found for cid://67890." + } + + def 'should print error store is not found in diff'(){ + when: + def config = new ConfigMap() + new CidCommandImpl().log(config) + new CidCommandImpl().show(config,["cid:///?type=DataOutput"]) + new CidCommandImpl().lineage(config,["cid://12345", "output.html"]) + new CidCommandImpl().diff(config, ["cid://89012", "cid://12345"]) + + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + def expectedOutput = "Error CID store not loaded. Check Nextflow configuration." + then: + stdout.size() == 4 + stdout[0] == expectedOutput + stdout[1] == expectedOutput + stdout[2] == expectedOutput + stdout[3] == expectedOutput + } + + def 'should find metadata descriptions'(){ + given: + def cidFile = storeLocation.resolve(".meta/123987/file.bam/.data.json") + Files.createDirectories(cidFile.parent) + def cidFile2 = storeLocation.resolve(".meta/123987/file2.bam/.data.json") + Files.createDirectories(cidFile2.parent) + def encoder = new CidEncoder().withPrettyPrint(true) + def time = Instant.ofEpochMilli(123456789) + def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + "cid://123987/file2.bam", "cid://123987/", null, 1235, time, time, null) + def expectedOutput1 = '[\n "cid://123987/file.bam",\n "cid://123987/file2.bam"\n]' + def expectedOutput2 = '[\n "cid://123987/file2.bam",\n "cid://123987/file.bam"\n]' + cidFile.text = encoder.encode(entry) + cidFile2.text = encoder.encode(entry2) + when: + new CidCommandImpl().find(configMap, ["type=DataOutput"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.join('\n') == expectedOutput1 || stdout.join('\n') == expectedOutput2 + } + + + +} diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy index f7899528b4..90c603d0fe 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy @@ -21,6 +21,9 @@ import nextflow.data.cid.DefaultCidStore import spock.lang.Shared import java.nio.ByteBuffer +import java.nio.channels.NonWritableChannelException +import java.nio.file.AccessDeniedException +import java.nio.file.AccessMode import java.nio.file.FileSystemNotFoundException import java.nio.file.Files import java.nio.file.Path @@ -136,20 +139,105 @@ class CidFileSystemProviderTest extends Specification { def opts = Set.of(StandardOpenOption.READ) when: def channel = provider.newByteChannel(cid, opts) - and: + then: + channel.isOpen() + channel.position() == 0 + channel.size() == "Hello, World!".getBytes().size() + when: + channel.truncate(25) + then: + thrown(NonWritableChannelException) + + when: def buffer = ByteBuffer.allocate(1000); def read = channel.read(buffer) - channel.close() def bytes = new byte[read] buffer.get(0,bytes) then: bytes == "Hello, World!".getBytes() + when: + channel.position(2) + then: + channel.position() == 2 + + when: + channel.write(buffer) + then: + thrown(NonWritableChannelException) + + when: + provider.newByteChannel(cid, Set.of(StandardOpenOption.WRITE)) + then: + thrown(UnsupportedOperationException) + + when: + provider.newByteChannel(cid, Set.of(StandardOpenOption.APPEND)) + then: + thrown(UnsupportedOperationException) cleanup: + channel.close() outputMeta.deleteDir() output.delete() } + def 'should create new byte channel for CidMetadata' () { + given: + def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def outputMeta = meta.resolve("12345") + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"WorkflowRun","sessionId":"session","name":"run_name","params":[{"type":"String","name":"param1","value":"value1"}]}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new CidFileSystemProvider() + def cid = provider.getPath(CidPath.asUri('cid://12345#name')) + + when: + def channel = provider.newByteChannel(cid, Set.of(StandardOpenOption.READ)) + then: + channel.isOpen() + channel.position() == 0 + channel.size() == '"run_name"'.getBytes().size() + + when: + channel.truncate(25) + then: + thrown(NonWritableChannelException) + + when: + def buffer = ByteBuffer.allocate(1000); + def read = channel.read(buffer) + def bytes = new byte[read] + buffer.get(0,bytes) + then: + bytes =='"run_name"'.getBytes() + + when: + channel.position(2) + then: + channel.position() == 2 + + when: + channel.write(buffer) + then: + thrown(NonWritableChannelException) + + when: + provider.newByteChannel(cid, Set.of(StandardOpenOption.WRITE)) + then: + thrown(UnsupportedOperationException) + + when: + provider.newByteChannel(cid, Set.of(StandardOpenOption.APPEND)) + then: + thrown(UnsupportedOperationException) + + cleanup: + channel.close() + outputMeta.deleteDir() + } + def 'should read cid' () { given: def config = [workflow:[data:[store:[location:wdir.toString()]]]] @@ -355,5 +443,47 @@ class CidFileSystemProviderTest extends Specification { meta.resolve('12345').deleteDir() } + def 'should throw exception in unsupported methods'() { + given: + def provider = new CidFileSystemProvider() + + when: + provider.newOutputStream(null) + then: + thrown(UnsupportedOperationException) + + when: + provider.getFileStore(null) + then: + thrown(UnsupportedOperationException) + + when: + provider.readAttributes(null, "attrib") + then: + thrown(UnsupportedOperationException) + + when: + provider.setAttribute(null, "attrib", null) + then: + thrown(UnsupportedOperationException) + } + + def 'should throw exception when checking access mode'(){ + given: + def provider = new CidFileSystemProvider() + def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + + when: + provider.checkAccess(cid1, AccessMode.WRITE) + then: + def ex1 = thrown(AccessDeniedException) + ex1.message == "Write mode not supported" + + when: + provider.checkAccess(cid1, AccessMode.EXECUTE) + then: + def ex2 = thrown(AccessDeniedException) + ex2.message == "Execute mode not supported" + } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy index 06bc689e07..cadaeb3386 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy @@ -18,11 +18,21 @@ package nextflow.data.cid.fs import nextflow.data.cid.CidUtils +import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.Workflow import nextflow.data.cid.model.WorkflowOutputs +import nextflow.data.cid.model.DataOutput +import nextflow.data.cid.model.WorkflowRun import nextflow.data.cid.serde.CidEncoder import nextflow.file.FileHelper +import nextflow.util.CacheHelper +import org.junit.Rule +import test.OutputCapture import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.ProviderMismatchException import java.time.Instant import spock.lang.Shared @@ -40,6 +50,9 @@ class CidPathTest extends Specification { @Shared def data = wdir.resolve('work') @Shared def fs = Mock(CidFileSystem) + @Rule + OutputCapture capture = new OutputCapture() + def cleanupSpec(){ wdir.deleteDir() } @@ -104,6 +117,8 @@ class CidPathTest extends Specification { fs | '1234' | ['/'] as String[] | '1234' null | '1234' | ['/'] as String[] | '1234' null | '../../a/b' | [] as String[] | '../../a/b' + fs | '1234/' | [] as String[] | '1234' + null | '1234/' | [] as String[] | '1234' } def 'should get target path' () { @@ -154,6 +169,11 @@ class CidPathTest extends Specification { then: thrown(FileNotFoundException) + when: 'CidPath subfolder of an output data description does not exist' + new CidPath(cidFs, '12345/output1/other/path').getTargetPath() + then: + thrown(FileNotFoundException) + when: 'Cid does not exist' new CidPath(cidFs, '23456').getTargetPath() then: @@ -181,6 +201,44 @@ class CidPathTest extends Specification { } + def 'should get subobjects as path' (){ + given: + def cidFs = new FileHelper().getOrCreateFileSystemFor('cid', [enabled: true, store: [location: cid.parent.toString()]] ) as CidFileSystem + def wf = new WorkflowRun(new Workflow([],"repo", "commit"), "sessionId", "runId", [new Parameter("String", "param1", "value1")]) + + when: 'workflow repo in workflow run' + Path p = CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["workflow", "repository"] as String[]) + then: + p instanceof CidMetadataPath + p.text == '"repo"' + + when: 'outputs' + def outputs = new WorkflowOutputs(Instant.now(), "cid://12345", [ samples: ["sample1", "sample2"]]) + cidFs.cidStore.save("12345/outputs", outputs) + Path p2 = CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["outputs"] as String[]) + then: + p2 instanceof CidMetadataPath + p2.text == CidUtils.encodeSearchOutputs([ samples: ["sample1", "sample2"]], true) + + when: 'child does not exists' + CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["no-exist"] as String[]) + then: + def exception = thrown(FileNotFoundException) + exception.message == "Target path '12345#no-exist' does not exist." + + when: 'outputs does not exists' + CidPath.getMetadataAsTargetPath(wf, cidFs, "6789", ["outputs"] as String[]) + then: + def exception1 = thrown(FileNotFoundException) + exception1.message == "Target path '6789#outputs' does not exist." + + when: 'null object' + CidPath.getMetadataAsTargetPath(null, cidFs, "12345", ["no-exist"] as String[]) + then: + def exception2 = thrown(FileNotFoundException) + exception2.message == "Target path '12345' does not exist." + } + def 'should get file name' () { when: def cid1 = new CidPath(fs, '1234567890/this/file.bam') @@ -368,7 +426,91 @@ class CidPathTest extends Specification { and: result == new CidPath(cidfs, '321') } - + + def 'should throw illegat exception when not correct scheme' (){ + when: 'creation' + new CidPath(fs, new URI("http://1234")) + then: + thrown(IllegalArgumentException) + + when: 'asUri' + CidPath.asUri("http://1234") + then: + thrown(IllegalArgumentException) + + when: 'asUri' + CidPath.asUri("") + then: + thrown(IllegalArgumentException) + + } + + def 'should throw provider mismatch exception when different path types' () { + given: + def pr = Mock(CidFileSystemProvider) + def fs = Mock(CidFileSystem){ + provider() >> pr} + and: + def cid = new CidPath(fs, '123/a/b/c') + + when: 'resolve with path' + cid.resolve(Path.of('d')) + then: + thrown(ProviderMismatchException) + + when: 'resolve with uri string' + cid.resolve(Path.of('http://1234')) + then: + thrown(ProviderMismatchException) + + when: 'relativize' + cid.relativize(Path.of('d')) + then: + thrown(ProviderMismatchException) + } + + def 'should throw exception for unsupported methods' () { + given: + def pr = Mock(CidFileSystemProvider) + def fs = Mock(CidFileSystem){ + provider() >> pr} + and: + def cid = new CidPath(fs, '123/a/b/c') + + when: 'to file' + cid.toFile() + then: + thrown(UnsupportedOperationException) + + when: 'register' + cid.register(null, null,null) + then: + thrown(UnsupportedOperationException) + } + + def 'should throw exception for incorrect index'() { + when: 'getting name with negative index' + new CidPath(fs, "1234").getName(-1) + then: + thrown(IllegalArgumentException) + + when: 'getting name with larger index tha namecount' + new CidPath(fs, "1234").getName(2) + then: + thrown(IllegalArgumentException) + + when: 'getting subpath with negative index' + new CidPath(fs, "1234").subpath(-1,1) + then: + thrown(IllegalArgumentException) + + when: 'getting subpath with larger index tha namecount' + new CidPath(fs, "1234").subpath(0,2) + then: + thrown(IllegalArgumentException) + + } + @Unroll def 'should get to uri string' () { expect: @@ -431,4 +573,96 @@ class CidPathTest extends Specification { 'cid://foo/bar' | true } + def 'should detect equals'(){ + expect: + new CidPath(FS1, PATH1).equals(new CidPath(FS2, PATH2)) == EXPECTED + where: + FS1 | FS2 | PATH1 | PATH2 | EXPECTED + null | fs | "12345/path" | "12345/path" | false + fs | null | "12345/path" | "12345/path" | false + null | null | "12345/" | "12345/path" | false + fs | fs | "12345/" | "12345/path" | false + and: + null | null | "12345/path" | "12345/path" | true + fs | fs | "12345/path" | "12345/path" | true + null | null | "12345/" | "12345" | true + fs | fs | "12345/" | "12345 " | true + } + + def 'should validate correct hash'(){ + when: + def file = wdir.resolve("file.txt") + file.text = "this is a data file" + def hash = CacheHelper.hasher(file).hash().toString() + def correctData = new DataOutput(file.toString(), new Checksum(hash,"nextflow", "standard")) + CidPath.validateDataOutput(correctData) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 0 + + cleanup: + file.delete() + } + + def 'should warn with incorrect hash'(){ + when: + def file = wdir.resolve("file.txt") + file.text = "this is a data file" + def hash = CacheHelper.hasher(file).hash().toString() + def correctData = new DataOutput(file.toString(), new Checksum("abscd","nextflow", "standard")) + CidPath.validateDataOutput(correctData) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0].endsWith("Checksum of '$file' does not match with the one stored in the metadata") + + cleanup: + file.delete() + } + + def 'should warn when hash algorithm is not supported'(){ + when: + def file = wdir.resolve("file.txt") + file.text = "this is a data file" + def hash = CacheHelper.hasher(file).hash().toString() + def correctData = new DataOutput(file.toString(), new Checksum(hash,"not-supported", "standard")) + CidPath.validateDataOutput(correctData) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0].endsWith("Checksum of '$file' can't be validated. Algorithm 'not-supported' is not supported") + + cleanup: + file.delete() + } + + def 'should throw exception when file not found validating hash'(){ + when: + def correctData = new DataOutput("not/existing/file", new Checksum("120741","nextflow", "standard")) + CidPath.validateDataOutput(correctData) + + then: + thrown(FileNotFoundException) + + } + + } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy index a504f66e8a..8217fc0420 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy @@ -19,7 +19,8 @@ class CidEncoderTest extends Specification{ given: def encoder = new CidEncoder() and: - def output = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) + def output = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), + "cid://source", "cid://workflow", "cid://task", 1234) when: def encoded = encoder.encode(output) diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy index 5856a10839..9586501699 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy +++ b/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy @@ -133,11 +133,11 @@ class H2CidStore implements CidStore { } @Override - List search(String queryString) { - final results= new LinkedList() + Map search(String queryString) { + final results= new HashMap() try(final sql=new Sql(dataSource)) { - sql.eachRow("SELECT metadata FROM cid_file WHERE JSON_MATCH(metadata, ?)", List.of(queryString)) { row -> - results.add(encoder.decode(toValue(row['metadata']) as String)) + sql.eachRow("SELECT path, metadata FROM cid_file WHERE JSON_MATCH(metadata, ?)", List.of(queryString)) { row -> + results.put(row['path'] as String, encoder.decode(toValue(row['metadata']) as String)) } } return results diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy index 8895d7762e..343cfca066 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy @@ -50,7 +50,7 @@ class H2CidStoreTest extends Specification { def 'should store and get a value' () { given: - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://run", 1234) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://workflow", "cid//task", 1234) when: store.save('/some/key', value) then: @@ -66,11 +66,11 @@ class H2CidStoreTest extends Specification { def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) def key2 = "testKey2" - def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://run", 1234, time, time, [key1: "value1", key2: "value2"]) + def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [key1: "value1", key2: "value2"]) def key3 = "testKey3" - def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://run", 1234, time, time, [key2: "value2", key3: "value3"]) + def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [key2: "value2", key3: "value3"]) def key4 = "testKey4" - def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://run", 1234, time, time, [key3: "value3", key4: "value4"]) + def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [key3: "value3", key4: "value4"]) store.save(key, value1) store.save(key2, value2) From f1a87704c18076aa6985253cb3b5bf212d17368a Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Fri, 11 Apr 2025 15:44:53 -0400 Subject: [PATCH 40/72] Remove dot from message [ci fast] Signed-off-by: Paolo Di Tommaso --- .../nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy | 2 +- .../src/main/nextflow/data/cid/cli/CidCommandImpl.groovy | 4 ++-- .../nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy | 2 +- .../src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index 7b8f5b7c30..7989157ff6 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -182,7 +182,7 @@ class CmdCidTest extends Specification { then: stdout.size() == 1 - stdout[0] == "No entries found for cid://12345." + stdout[0] == "No entries found for cid://12345" cleanup: folder?.deleteDir() diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index bbda78a659..efd68e9992 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -100,7 +100,7 @@ class CidCommandImpl implements CmdCid.CidCommand { try { def entries = CidUtils.query(store, new URI(args[0])) if( !entries ) { - println "No entries found for ${args[0]}." + println "No entries found for ${args[0]}" return } entries = entries.size() == 1 ? entries[0] : entries @@ -314,4 +314,4 @@ class CidCommandImpl implements CmdCid.CidCommand { println "Exception searching for ${args[0]}. ${e.message}" } } -} \ No newline at end of file +} diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index f0f228984d..790bde0aa5 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -279,7 +279,7 @@ class CidObserverTest extends Specification { when: observer.onProcessComplete(handler, null ) - def taskRunResult = store.load("${hash.toString()}") + def taskRunResult = store.load(hash.toString()) as nextflow.data.cid.model.TaskRun def dataOutputResult1 = store.load("${hash}/outputs/fileOut1.txt") as DataOutput def dataOutputResult2 = store.load("${hash}/outputs/fileOut2.txt") as DataOutput def taskOutputsResult = store.load("${hash}/outputs") as TaskOutputs diff --git a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy index 2c8cc062e0..364abed3a4 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy @@ -135,7 +135,7 @@ class CidCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "No entries found for cid://12345." + stdout[0] == "No entries found for cid://12345" } def 'should get lineage cid content' (){ From 10f2b87848afd7197a9d25bea323034e50493665 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Fri, 11 Apr 2025 16:10:33 -0400 Subject: [PATCH 41/72] Fix test on macOs [ci fast] Signed-off-by: Paolo Di Tommaso --- .../nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 790bde0aa5..a655c9e57e 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -182,7 +182,7 @@ class CidObserverTest extends Specification { def 'should save task run' () { given: - def folder = Files.createTempDirectory('test') + def folder = Files.createTempDirectory('test').toRealPath() def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] def uniqueId = UUID.randomUUID() def workDir = folder.resolve("work") From 14ede8e628498ccb83b8bfdaf9fad22e7a5224be Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 13 Apr 2025 18:11:12 -0400 Subject: [PATCH 42/72] Update copyright [ci fast] Signed-off-by: Paolo Di Tommaso --- modules/nf-cid/build.gradle | 2 +- .../main/nextflow/data/cid/CidHistoryLog.groovy | 5 ++--- .../nextflow/data/cid/CidHistoryRecord.groovy | 3 +-- .../main/nextflow/data/cid/CidObserver.groovy | 1 - .../nextflow/data/cid/CidObserverFactory.groovy | 1 - .../src/main/nextflow/data/cid/CidStore.groovy | 1 - .../nextflow/data/cid/CidStoreFactory.groovy | 3 +-- .../src/main/nextflow/data/cid/CidUtils.groovy | 1 - .../data/cid/DefaultCidHistoryLog.groovy | 3 +-- .../nextflow/data/cid/DefaultCidStore.groovy | 3 +-- .../data/cid/DefaultCidStoreFactory.groovy | 3 +-- .../nextflow/data/cid/cli/CidCommandImpl.groovy | 1 - .../nextflow/data/cid/fs/CidFileSystem.groovy | 3 +-- .../data/cid/fs/CidFileSystemProvider.groovy | 3 +-- .../nextflow/data/cid/fs/CidMetadataPath.groovy | 3 +-- .../src/main/nextflow/data/cid/fs/CidPath.groovy | 1 - .../nextflow/data/cid/fs/CidPathFactory.groovy | 1 - .../cid/fs/ResultsSeekableByteChannel.groovy | 3 +-- .../main/nextflow/data/cid/model/Checksum.groovy | 3 +-- .../nextflow/data/cid/model/DataOutput.groovy | 3 +-- .../main/nextflow/data/cid/model/DataPath.groovy | 3 +-- .../nextflow/data/cid/model/Parameter.groovy | 3 +-- .../nextflow/data/cid/model/TaskOutputs.groovy | 3 +-- .../main/nextflow/data/cid/model/TaskRun.groovy | 3 +-- .../main/nextflow/data/cid/model/Workflow.groovy | 3 +-- .../data/cid/model/WorkflowOutputs.groovy | 3 +-- .../nextflow/data/cid/model/WorkflowRun.groovy | 3 +-- .../main/nextflow/data/config/DataConfig.groovy | 3 +-- .../nextflow/data/config/DataStoreOpts.groovy | 3 +-- .../java.nio.file.spi.FileSystemProvider | 2 +- .../data/cid/CidHistoryRecordTest.groovy | 3 +-- .../nextflow/data/cid/CidObserverTest.groovy | 3 +-- .../data/cid/CidPropertyValidationTest.groovy | 3 +-- .../test/nextflow/data/cid/CidUtilsTest.groovy | 16 ++++++++++++++++ .../data/cid/DefaultCidHistoryLogTest.groovy | 3 +-- .../data/cid/DefaultCidStoreFactoryTest.groovy | 1 - .../nextflow/data/cid/DefaultCidStoreTest.groovy | 3 +-- .../data/cid/cli/CidCommandImplTest.groovy | 16 ++++++++++++++++ .../data/cid/fs/CidFileSystemProviderTest.groovy | 3 +-- .../test/nextflow/data/cid/fs/CidPathTest.groovy | 3 +-- .../data/cid/fs/CifPathFactoryTest.groovy | 3 +-- .../data/cid/serde/CidEncoderTest.groovy | 16 ++++++++++++++++ .../nextflow/data/config/DataConfigTest.groovy | 3 +-- 43 files changed, 81 insertions(+), 71 deletions(-) diff --git a/modules/nf-cid/build.gradle b/modules/nf-cid/build.gradle index 6a7edfabd6..4d7405bfc5 100644 --- a/modules/nf-cid/build.gradle +++ b/modules/nf-cid/build.gradle @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy index 3b71e911e8..48621b96a2 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid @@ -54,4 +53,4 @@ interface CidHistoryLog { */ CidHistoryRecord getRecord(UUID sessionId) -} \ No newline at end of file +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy index 03390fca62..c80a2ce65f 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index 508fac8f83..611f7cbe6d 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy index 34ce676593..1826f06b5c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy index bfff2f9928..f90cd4249f 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy index 0e18cff453..3b6befd1dd 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy index 482fdeafde..bcec2e1989 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy index 6b3bbeda9c..f9e6144c28 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy index c421023ae1..a9c374dcc8 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy index 88eeaf41a3..f9feba03a9 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index efd68e9992..0eecaf679a 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.cli diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy index 356183e0b9..15ed3c7e7d 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy index 0ea52625d0..cf2e1566de 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy index 1c98ca3f84..0801b2936c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs @@ -77,4 +76,4 @@ class CidMetadataPath extends CidPath { Object fileKey() { return null } } } -} \ No newline at end of file +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index 49e0d3f7aa..8d96406e84 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy index cd6031f137..567f047094 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy index 40a44c2569..3b3be03efb 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs @@ -74,4 +73,4 @@ class ResultsSeekableByteChannel implements SeekableByteChannel { @Override void close() { open = false } -} \ No newline at end of file +} diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy index 44c212b66c..aa653ba9f5 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy index f455dee6a2..554d578c55 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy index 45a2462d6a..8688d4d1b4 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy index 11cbe4ee9d..29210442ec 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy index ebf6a6522b..6fb90b5a23 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy index b20094cd06..2c043ed392 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy index 90947d6478..fb42cb3bd1 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy index eaad598136..51e97f7466 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy index ab943cf18b..c7567d34b2 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.model diff --git a/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy b/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy index 1038ddfe18..ece424be4e 100644 --- a/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy +++ b/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.config diff --git a/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy b/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy index 8a4ee10335..09da0c6476 100644 --- a/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy +++ b/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.config diff --git a/modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider index ba80b4b30a..d68acbd6d1 100644 --- a/modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider +++ b/modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider @@ -1,5 +1,5 @@ # -# Copyright 2013-2024, Seqera Labs +# Copyright 2013-2025, Seqera Labs # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy index 6a104dcafc..6478069051 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index a655c9e57e..78b98d005e 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy index e5cf7f74ec..c81a8d7116 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy index b127a5ee9d..8119337bb1 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy @@ -1,3 +1,19 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package nextflow.data.cid import nextflow.data.cid.model.Checksum diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy index a80a2aed98..64e922d18e 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy index 4f4f116a96..1ec482e71c 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index 2c14d6edb7..d8018b64f3 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid diff --git a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy index 364abed3a4..9d0fd89b2d 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy @@ -1,3 +1,19 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package nextflow.data.cid.cli import nextflow.SysEnv diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy index 90c603d0fe..673146c2e4 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy index cadaeb3386..ce813da4fc 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy index 24e5f44fee..18cb2e0b8c 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.cid.fs diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy index 8217fc0420..65b38adb32 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy @@ -1,3 +1,19 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package nextflow.data.cid.serde import nextflow.data.cid.model.Checksum diff --git a/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy b/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy index 5eff1c7103..7b3e8a7944 100644 --- a/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package nextflow.data.config From 802a119dbabc10002031bffd4aa5375cff83c79a Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 13 Apr 2025 18:15:40 -0400 Subject: [PATCH 43/72] Nit formatting [ci skip] Signed-off-by: Paolo Di Tommaso --- .../src/main/nextflow/data/cid/fs/CidPath.groovy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index 8d96406e84..0a3340802b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -165,7 +165,7 @@ class CidPath implements Path, RealPathAware { // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output final currentPath = Path.of(filePath) final parent = Path.of(filePath).getParent() - if( parent) { + if( parent ) { ArrayList newChildren = new ArrayList() newChildren.add(currentPath.getFileName().toString()) newChildren.addAll(children) @@ -185,7 +185,6 @@ class CidPath implements Path, RealPathAware { } else { return generateCidMetadataPath(fs, filePath, results, children) } - } /** @@ -206,7 +205,8 @@ class CidPath implements Path, RealPathAware { throw new FileNotFoundException("Target path '$key#outputs' does not exist.") } return generateCidMetadataPath(fs, key, outputs, children) - } else { + } + else { return generateCidMetadataPath(fs, key, object, children) } } @@ -245,7 +245,7 @@ class CidPath implements Path, RealPathAware { final remain = parts[1..-1] + more.toList() return resolve0(fs, parts[0], remain as String[]) } - def result = Path.of(base) + final result = Path.of(base) return more ? result.resolve(more.join(SEPARATOR)).toString() : result.toString() } @@ -414,7 +414,7 @@ class CidPath implements Path, RealPathAware { @Override URI toUri() { - asUri("${SCHEME}://${filePath}${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}") + return asUri("${SCHEME}://${filePath}${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}") } String toUriString() { From 10082ca1a76b7fc0fedfd373b96d042b7f4376d5 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 13 Apr 2025 18:22:03 -0400 Subject: [PATCH 44/72] Fix failing tests [ci fast] Signed-off-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/trace/TraceObserver.groovy | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy index 8830950787..ee937a1f53 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy @@ -154,6 +154,7 @@ interface TraceObserver { default void onFilePublish(Path destination, Path source){ onFilePublish(destination) } + /** * Method that is invoked when a output file is annotated * @param destination @@ -161,7 +162,7 @@ interface TraceObserver { * @param annotations * The annotations attached to this file */ - void onFilePublish(Path destination, Path source, Map annotations){ + default void onFilePublish(Path destination, Path source, Map annotations){ onFilePublish(destination, source) } From 7bff926df37067109a8643fded243ef028303952 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 13 Apr 2025 18:42:55 -0400 Subject: [PATCH 45/72] Refactor RealPathAware to LogicalPath [ci fast] Signed-off-by: Paolo Di Tommaso --- ...lPathAware.groovy => LogicalDataPath.groovy} | 17 ++++++++--------- .../nextflow/processor/TaskProcessor.groovy | 4 ++-- .../main/nextflow/data/cid/fs/CidPath.groovy | 8 ++++++-- 3 files changed, 16 insertions(+), 13 deletions(-) rename modules/nextflow/src/main/groovy/nextflow/file/{RealPathAware.groovy => LogicalDataPath.groovy} (65%) diff --git a/modules/nextflow/src/main/groovy/nextflow/file/RealPathAware.groovy b/modules/nextflow/src/main/groovy/nextflow/file/LogicalDataPath.groovy similarity index 65% rename from modules/nextflow/src/main/groovy/nextflow/file/RealPathAware.groovy rename to modules/nextflow/src/main/groovy/nextflow/file/LogicalDataPath.groovy index e8d1903520..b980bb460e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/file/RealPathAware.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/file/LogicalDataPath.groovy @@ -17,21 +17,20 @@ package nextflow.file -import java.nio.file.LinkOption -import java.nio.file.Path +import java.nio.file.Path /** - * Marker interface for objects that represent a real path. + * Marker interface for a logical file path associated with a (real) target path. * - * This interface is used in the { @link nextflow.processor.TaskProcessor } when managing the foreign file staging. + * This interface is used in the {@link nextflow.processor.TaskProcessor} when managing the foreign file staging. * * @author Paolo Di Tommaso */ -interface RealPathAware { +interface LogicalDataPath { /** - * Returns the real path - * @param options - * @return + * Resolve the logical path to the target path. + * + * @return The real {@link Path} object associated with this logical path. */ - Path toRealPath(LinkOption... options) + Path toTargetPath() } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 3f968f97a9..8e997c6ea7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -77,7 +77,7 @@ import nextflow.file.FileHelper import nextflow.file.FileHolder import nextflow.file.FilePatternSplitter import nextflow.file.FilePorter -import nextflow.file.RealPathAware +import nextflow.file.LogicalDataPath import nextflow.plugin.Plugins import nextflow.processor.tip.TaskTipProvider import nextflow.script.BaseScript @@ -1942,7 +1942,7 @@ class TaskProcessor { if( item instanceof Path || coerceToPath ) { def path = normalizeToPath(item) - if (path instanceof RealPathAware){ + if (path instanceof LogicalDataPath){ path = path.toRealPath() } def target = executor.isForeignFile(path) ? foreignFiles.addToForeign(path) : path diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index 0a3340802b..76c48de376 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -20,7 +20,7 @@ import groovy.util.logging.Slf4j import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataOutput import nextflow.data.cid.serde.CidSerializable -import nextflow.file.RealPathAware +import nextflow.file.LogicalDataPath import nextflow.util.CacheHelper import nextflow.util.TestOnly @@ -48,7 +48,7 @@ import nextflow.file.FileHelper */ @Slf4j @CompileStatic -class CidPath implements Path, RealPathAware { +class CidPath implements Path, LogicalDataPath { static public final List SUPPORTED_CHECKSUM_ALGORITHMS=["nextflow"] static public final String SEPARATOR = '/' @@ -431,6 +431,10 @@ class CidPath implements Path, RealPathAware { return this.getTargetPath(true) } + Path toTargetPath() { + return getTargetPath(true) + } + protected Path getTargetPath(boolean resultsAsPath=false){ return findTarget(fileSystem, filePath, resultsAsPath, parseChildrenFormFragment(fragment)) } From a8765e6af393811b4861ad9885e2a59d692cc8ee Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 13 Apr 2025 18:50:15 -0400 Subject: [PATCH 46/72] Minor change [ci skip] Signed-off-by: Paolo Di Tommaso --- .../nextflow/processor/TaskProcessor.groovy | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 8e997c6ea7..2d75d53b32 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -1879,6 +1879,13 @@ class TaskProcessor { return Collections.unmodifiableMap(result) } + protected Path resolvePath(Object item) { + final result = normalizeToPath(item) + return result instanceof LogicalDataPath + ? result.toTargetPath() + : result + } + /** * An input file parameter can be provided with any value other than a file. * This function normalize a generic value to a {@code Path} create a temporary file @@ -1889,7 +1896,6 @@ class TaskProcessor { * @return The {@code Path} that will be staged in the task working folder */ protected FileHolder normalizeInputToFile( Object input, String altName ) { - /* * when it is a local file, just return a reference holder to it */ @@ -1940,13 +1946,9 @@ class TaskProcessor { for( def item : allItems ) { if( item instanceof Path || coerceToPath ) { - def path = normalizeToPath(item) - - if (path instanceof LogicalDataPath){ - path = path.toRealPath() - } - def target = executor.isForeignFile(path) ? foreignFiles.addToForeign(path) : path - def holder = new FileHolder(target) + final path = resolvePath(item) + final target = executor.isForeignFile(path) ? foreignFiles.addToForeign(path) : path + final holder = new FileHolder(target) files << holder } else { From 3c93ee96cc44bcdcb541223a4570332b703fcae8 Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Tue, 15 Apr 2025 09:10:26 +0200 Subject: [PATCH 47/72] CID consolidation (#5969) [ci fast] Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdRun.groovy | 3 +- .../groovy/nextflow/cli/CmdCidTest.groovy | 12 +- .../main/nextflow/data/cid/CidObserver.groovy | 111 ++++++++++++------ .../data/cid/CidPropertyValidator.groovy | 12 +- .../main/nextflow/data/cid/CidUtils.groovy | 40 +++---- .../data/cid/cli/CidCommandImpl.groovy | 2 +- .../main/nextflow/data/cid/fs/CidPath.groovy | 6 +- .../nextflow/data/cid/model/Annotation.groovy | 33 ++++++ .../nextflow/data/cid/model/DataOutput.groovy | 8 +- .../data/cid/model/TaskOutputs.groovy | 5 +- .../nextflow/data/cid/model/TaskRun.groovy | 2 +- .../data/cid/model/WorkflowOutputs.groovy | 7 +- .../data/cid/model/WorkflowRun.groovy | 6 +- .../nextflow/data/cid/CidObserverTest.groovy | 57 ++++++--- .../data/cid/CidPropertyValidationTest.groovy | 6 +- .../nextflow/data/cid/CidUtilsTest.groovy | 32 +++-- .../data/cid/DefaultCidStoreTest.groovy | 24 ++-- .../data/cid/cli/CidCommandImplTest.groovy | 20 ++-- .../nextflow/data/cid/fs/CidPathTest.groovy | 15 +-- .../data/cid/serde/CidEncoderTest.groovy | 10 +- .../serde/gson/OffsetDateTimeAdapter.groovy | 6 + .../data/cid/h2/H2CidStoreTest.groovy | 12 +- 22 files changed, 278 insertions(+), 151 deletions(-) create mode 100644 modules/nf-cid/src/main/nextflow/data/cid/model/Annotation.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 67ca12c2c3..4f3bbf423e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -354,7 +354,8 @@ class CmdRun extends CmdBase implements HubOptions { runner.session.disableJobsCancellation = getDisableJobsCancellation() final isTowerEnabled = config.navigate('tower.enabled') as Boolean - if( isTowerEnabled || log.isTraceEnabled() ) + final isDataEnabled = config.navigate("workflow.data.enabled") as Boolean + if( isTowerEnabled || isDataEnabled || log.isTraceEnabled() ) runner.session.resolvedConfig = ConfigBuilder.resolveConfig(scriptFile.parent, this) // note config files are collected during the build process // this line should be after `ConfigBuilder#build` diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy index 7989157ff6..3207203475 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy @@ -35,7 +35,7 @@ import org.junit.Rule import spock.lang.Specification import test.OutputCapture -import java.time.Instant +import java.time.OffsetDateTime /** * CLI cid Tests @@ -136,7 +136,7 @@ class CmdCidTest extends Specification { def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.now() def encoder = new CidEncoder().withPrettyPrint(true) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam","cid://12345/","cid://123987/", 1234, time, time, null) @@ -182,7 +182,7 @@ class CmdCidTest extends Specification { then: stdout.size() == 1 - stdout[0] == "No entries found for cid://12345" + stdout[0] == "Error loading cid://12345. Cid object 12345 not found." cleanup: folder?.deleteDir() @@ -208,7 +208,7 @@ class CmdCidTest extends Specification { Files.createDirectories(cidFile4.parent) Files.createDirectories(cidFile5.parent) def encoder = new CidEncoder() - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.now() def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://45678",null, 1234, time, time, null) cidFile.text = encoder.encode(entry) @@ -279,7 +279,7 @@ class CmdCidTest extends Specification { getOptions() >> new CliOptions(config: [configFile.toString()]) } def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.now() def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://12345", "cid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) @@ -314,7 +314,7 @@ class CmdCidTest extends Specification { getOptions() >> new CliOptions(config: [configFile.toString()]) } def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.now() def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://12345", "cid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index 611f7cbe6d..b67ebca8bd 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -16,16 +16,18 @@ package nextflow.data.cid +import java.time.OffsetDateTime + import static nextflow.data.cid.fs.CidPath.* import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes -import java.time.Instant import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.Session +import nextflow.data.cid.model.Annotation import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataOutput import nextflow.data.cid.model.DataPath @@ -39,11 +41,21 @@ import nextflow.file.FileHolder import nextflow.processor.TaskHandler import nextflow.processor.TaskRun import nextflow.script.ScriptMeta + +import nextflow.script.params.BaseParam +import nextflow.script.params.CmdEvalParam import nextflow.script.params.DefaultInParam +import nextflow.script.params.EachInParam +import nextflow.script.params.EnvInParam +import nextflow.script.params.EnvOutParam import nextflow.script.params.FileInParam import nextflow.script.params.FileOutParam import nextflow.script.params.InParam import nextflow.script.params.OutParam +import nextflow.script.params.StdInParam +import nextflow.script.params.StdOutParam +import nextflow.script.params.ValueInParam +import nextflow.script.params.ValueOutParam import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord import nextflow.util.CacheHelper @@ -58,7 +70,18 @@ import nextflow.util.TestOnly @Slf4j @CompileStatic class CidObserver implements TraceObserver { - + private static Map, String> TaskParamToValue = [ + (StdOutParam) : "stdout", + (StdInParam) : "stdin", + (FileInParam) : "path", + (FileOutParam) : "path", + (ValueInParam) : "val", + (ValueOutParam): "val", + (EnvInParam) : "env", + (EnvOutParam) : "env", + (CmdEvalParam) : "eval", + (EachInParam) : "each" + ] private String executionHash private CidStore store private Session session @@ -91,9 +114,9 @@ class CidObserver implements TraceObserver { executionHash = storeWorkflowRun(normalizer) final executionUri = asUriString(executionHash) workflowResults = new WorkflowOutputs( - Instant.now(), + OffsetDateTime.now(), executionUri, - new HashMap() + new LinkedList() ) this.store.getHistoryLog().updateRunCid(session.uniqueId, executionUri) } @@ -101,8 +124,8 @@ class CidObserver implements TraceObserver { @Override void onFlowComplete(){ if (this.workflowResults){ - workflowResults.createdAt = Instant.now() - final key = executionHash + SEPARATOR + 'outputs' + workflowResults.createdAt = OffsetDateTime.now() + final key = executionHash + '#outputs' this.store.save(key, workflowResults) } } @@ -142,7 +165,8 @@ class CidObserver implements TraceObserver { workflow, session.uniqueId.toString(), session.runName, - getNormalizedParams(session.params, normalizer) + getNormalizedParams(session.params, normalizer), + session.resolvedConfig ) final executionHash = CacheHelper.hasher(value).hash().toString() store.save(executionHash, value) @@ -151,21 +175,12 @@ class CidObserver implements TraceObserver { protected static List getNormalizedParams(Map params, PathNormalizer normalizer){ final normalizedParams = new LinkedList() - params.each{String key, Object value -> - addNormalizedParam(key, value, normalizer, normalizedParams) + params.each{ String key, Object value -> + normalizedParams.add( new Parameter( getParameterType(value), key, normalizeValue(value, normalizer) ) ) } return normalizedParams } - private static void addNormalizedParam(String key, Object value, PathNormalizer normalizer, List normalizedParams){ - if( value instanceof Path ) - normalizedParams.add( new Parameter( Path.class.simpleName, key, normalizer.normalizePath( value as Path ) ) ) - else if ( value instanceof CharSequence ) - normalizedParams.add( new Parameter( String.class.simpleName, key, normalizer.normalizePath( value.toString() ) ) ) - else - normalizedParams.add( new Parameter( value.class.simpleName, key, value) ) - } - @Override void onProcessComplete(TaskHandler handler, TraceRecord trace) { storeTaskInfo(handler.task) @@ -180,8 +195,8 @@ class CidObserver implements TraceObserver { protected String storeTaskResults(TaskRun task, PathNormalizer normalizer){ final outputParams = getNormalizedTaskOutputs(task, normalizer) - final value = new TaskOutputs( asUriString(task.hash.toString()), asUriString(executionHash), Instant.now(), outputParams ) - final key = task.hash.toString() + SEPARATOR + 'outputs' + final value = new TaskOutputs( asUriString(task.hash.toString()), asUriString(executionHash), OffsetDateTime.now(), outputParams ) + final key = task.hash.toString() + '#outputs' store.save(key,value) return key } @@ -197,17 +212,21 @@ class CidObserver implements TraceObserver { private void manageTaskOutputParameter(OutParam key, LinkedList outputParams, value, TaskRun task, PathNormalizer normalizer) { if (key instanceof FileOutParam) { - outputParams.add(new Parameter(key.class.simpleName, key.name, manageFileOutParam(value, task))) + outputParams.add(new Parameter(getParameterType(key), key.name, manageFileOutParam(value, task))) } else { - if (value instanceof Path) - outputParams.add(new Parameter(key.class.simpleName, key.name, normalizer.normalizePath(value as Path))) - else if (value instanceof CharSequence) - outputParams.add(new Parameter(key.class.simpleName, key.name, normalizer.normalizePath(value.toString()))) - else - outputParams.add(new Parameter(key.class.simpleName, key.name, value)) + outputParams.add(new Parameter(getParameterType(key), key.name, normalizeValue(value, normalizer))) } } + private static Object normalizeValue(Object value, PathNormalizer normalizer) { + if (value instanceof Path) + return normalizer.normalizePath(value as Path) + else if (value instanceof CharSequence) + return normalizer.normalizePath(value.toString()) + else + return value + } + private Object manageFileOutParam(Object value, TaskRun task) { if (value == null) { throw new IllegalArgumentException("Unexpected output null for task '${task.name}'") @@ -277,12 +296,12 @@ class CidObserver implements TraceObserver { protected String getTaskOutputKey(TaskRun task, Path path) { final rel = getTaskRelative(task, path) - return task.hash.toString() + SEPARATOR + 'outputs' + SEPARATOR + rel + return task.hash.toString() + SEPARATOR + rel } protected String getWorkflowOutputKey(Path destination) { final rel = getWorkflowRelative(destination) - return executionHash + SEPARATOR + 'outputs' + SEPARATOR + rel + return executionHash + SEPARATOR + rel } protected String getTaskRelative(TaskRun task, Path path){ @@ -340,18 +359,25 @@ class CidObserver implements TraceObserver { attrs.size(), CidUtils.toDate(attrs?.creationTime()), CidUtils.toDate(attrs?.lastModifiedTime()), - annotations) + convertAnnotations(annotations)) store.save(key, value) } catch (Throwable e) { log.warn("Unexpected error storing published file '${destination.toUriString()}' for workflow '${executionHash}'", e) } } + private static List convertAnnotations(Map annotations){ + if( !annotations ) + return null + final converted = new LinkedList() + annotations.forEach { Object key, Object value -> converted.add(new Annotation(key.toString(), value)) } + return converted + } String getSourceReference(Path source){ final hash = FileHelper.getTaskHashFromPath(source, session.workDir) if (hash) { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() - return asUriString(hash.toString(), 'outputs', target) + return asUriString(hash.toString(), target) } final storeDirReference = outputsStoreDirCid.get(source.toString()) return storeDirReference ? asUriString(storeDirReference) : null @@ -364,7 +390,22 @@ class CidObserver implements TraceObserver { @Override void onWorkflowPublish(String name, Object value){ - workflowResults.outputs.put(name,convertPathsToCidReferences(value)) + workflowResults.outputs.add(new Parameter(getParameterType(value), name, convertPathsToCidReferences(value))) + } + + protected static String getParameterType(Object param) { + if( param instanceof BaseParam ) + return TaskParamToValue.get(param.class) + // return generic types + if( param instanceof Path ) + return Path.simpleName + if (param instanceof CharSequence) + return String.simpleName + if( param instanceof Collection ) + return Collection.simpleName + if( param instanceof Map) + return Map.simpleName + return param.class.simpleName } private Object convertPathsToCidReferences(Object value){ @@ -421,12 +462,10 @@ class CidObserver implements TraceObserver { protected List manageTaskInputParameters(Map inputs, PathNormalizer normalizer) { List managedInputs = new LinkedList() inputs.forEach{ param, value -> - final type = param.class.simpleName - final name = param.name if( param instanceof FileInParam ) - managedInputs.add( new Parameter( type, name, manageFileInParam( (List)value , normalizer) ) ) + managedInputs.add( new Parameter( getParameterType(param), param.name, manageFileInParam( (List)value , normalizer) ) ) else if( !(param instanceof DefaultInParam) ) - managedInputs.add( new Parameter( type, name, value) ) + managedInputs.add( new Parameter( getParameterType(param), param.name, value) ) } return managedInputs } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy index 7a6bd674c3..100788b72c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy @@ -16,7 +16,7 @@ package nextflow.data.cid -import groovy.util.logging.Slf4j +import nextflow.data.cid.model.Annotation import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.Parameter @@ -34,7 +34,7 @@ import java.lang.reflect.Field */ class CidPropertyValidator { - private static List CID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum] + private static List CID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum, Annotation] private Set validProperties CidPropertyValidator(){ @@ -46,7 +46,7 @@ class CidPropertyValidator { } } - void validate(String[] properties) { + void validate(Collection properties) { for(String property: properties) { if (!(property in this.validProperties)) { throw new IllegalArgumentException("Property '$property' doesn't exist in the CID model") @@ -54,6 +54,12 @@ class CidPropertyValidator { } } + void validateQueryParams (Map params){ + for(String key: params.keySet()) { + validate(key.tokenize('.')) + } + } + diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy index bcec2e1989..f1476bf61e 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy @@ -26,7 +26,8 @@ import nextflow.data.cid.serde.CidSerializable import nextflow.serde.gson.GsonEncoder import java.nio.file.attribute.FileTime -import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset /** * Utils class for CID. @@ -51,17 +52,12 @@ class CidUtils { */ static Collection query(CidStore store, URI uri) { String key = uri.authority ? uri.authority + uri.path : uri.path - try { - if (key == CidPath.SEPARATOR) { - return globalSearch(store, uri) - } else { - final parameters = uri.query ? parseQuery(uri.query) : null - final children = parseChildrenFormFragment(uri.fragment) - return searchPath(store, key, parameters, children ) - } - } catch(Throwable e){ - log.debug("Exception querying $uri. $e.message") - return [] + if (key == CidPath.SEPARATOR) { + return globalSearch(store, uri) + } else { + final parameters = uri.query ? parseQuery(uri.query) : null + final children = parseChildrenFormFragment(uri.fragment) + return searchPath(store, key, parameters, children ) } } @@ -95,6 +91,7 @@ class CidUtils { if( !fragment ) return EMPTY_ARRAY final children = fragment.tokenize('.') + new CidPropertyValidator().validate(children) return children as String[] } @@ -142,7 +139,7 @@ class CidUtils { static Object getSubObject(CidStore store, String key, CidSerializable object, String[] children) { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description - final outputs = store.load("${key}/outputs") + final outputs = store.load("${key}#outputs") if (!outputs) return null return navigate(outputs, children.join('.')) @@ -187,10 +184,11 @@ class CidUtils { if( !queryString ) { return [:] } - return queryString.split('&').collectEntries { + final params = queryString.split('&').collectEntries { it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } } as Map - + new CidPropertyValidator().validateQueryParams(params) + return params } /** @@ -276,14 +274,14 @@ class CidUtils { } /** - * Helper function to convert from FileTime to ISO 8601. + * Helper function to convert from FileTime to ISO 8601 with offser. * * @param time File time to convert - * @return Instant or null in case of not available (null) + * @return or null in case of not available (null) */ - static Instant toDate(FileTime time){ + static OffsetDateTime toDate(FileTime time){ if (time) - return Instant.ofEpochMilli(time.toMillis()) + return time.toInstant().atOffset(ZoneOffset.UTC) else return null } @@ -294,10 +292,10 @@ class CidUtils { * @param date ISO formated time * @return Converted FileTime or null if date is not available (null or 'N/A') */ - static FileTime toFileTime(String date){ + static FileTime toFileTime(OffsetDateTime date){ if (!date) return null - return FileTime.from(Instant.parse(date)) + return FileTime.from(date.toInstant()) } /** diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy index 0eecaf679a..981e41105b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy @@ -310,7 +310,7 @@ class CidCommandImpl implements CmdCid.CidCommand { try { println CidUtils.encodeSearchOutputs(store.search(args[0]).keySet().collect {asUriString(it)}, true) } catch (Throwable e){ - println "Exception searching for ${args[0]}. ${e.message}" + println "Error searching for ${args[0]}. ${e.message}" } } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index 76c48de376..0b9b83bcba 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -24,9 +24,6 @@ import nextflow.file.LogicalDataPath import nextflow.util.CacheHelper import nextflow.util.TestOnly -import java.nio.file.attribute.FileTime -import java.time.Instant - import static nextflow.data.cid.fs.CidFileSystemProvider.* import static nextflow.data.cid.CidUtils.* @@ -37,6 +34,7 @@ import java.nio.file.ProviderMismatchException import java.nio.file.WatchEvent import java.nio.file.WatchKey import java.nio.file.WatchService +import java.time.OffsetDateTime import groovy.transform.CompileStatic import nextflow.file.FileHelper @@ -212,7 +210,7 @@ class CidPath implements Path, LogicalDataPath { } private static CidMetadataPath generateCidMetadataPath(CidFileSystem fs, String key, Object object, String[] children){ - def creationTime = FileTime.from(navigate(object, 'createdAt') as Instant ?: Instant.now()) + def creationTime = toFileTime(navigate(object, 'createdAt') as OffsetDateTime ?: OffsetDateTime.now()) final output = children ? navigate(object, children.join('.')) : object if( !output ) { throw new FileNotFoundException("Target path '$key#${children.join('.')}' does not exist.") diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Annotation.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/Annotation.groovy new file mode 100644 index 0000000000..c6f3db6917 --- /dev/null +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/Annotation.groovy @@ -0,0 +1,33 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.data.cid.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Models an Annotation. + * + * @author Jorge Ejarque annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy index 6fb90b5a23..929ecc0345 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy @@ -21,6 +21,7 @@ import groovy.transform.CompileStatic import nextflow.data.cid.serde.CidSerializable import java.time.Instant +import java.time.OffsetDateTime /** * Models task results. @@ -41,7 +42,7 @@ class TaskOutputs implements CidSerializable { /** * Creation date of this task outputs description */ - Instant createdAt + OffsetDateTime createdAt /** * Outputs of the task */ @@ -49,5 +50,5 @@ class TaskOutputs implements CidSerializable { /** * Annotations attached to the task outputs */ - Map annotations + List annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy index 2c043ed392..04f722cc33 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy @@ -79,5 +79,5 @@ class TaskRun implements CidSerializable { /** * Annotations attached to the task run */ - Map annotations + List annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy index 51e97f7466..c78fb6bd5d 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy @@ -21,6 +21,7 @@ import groovy.transform.CompileStatic import nextflow.data.cid.serde.CidSerializable import java.time.Instant +import java.time.OffsetDateTime /** * Models the results of a workflow execution. @@ -33,7 +34,7 @@ class WorkflowOutputs implements CidSerializable { /** * Creation date of the workflow outputs description */ - Instant createdAt + OffsetDateTime createdAt /** * Workflow run that generated the outputs */ @@ -41,9 +42,9 @@ class WorkflowOutputs implements CidSerializable { /** * Workflow outputs */ - Map outputs + List outputs /** * Annotations attached to the workflow outputs */ - Map annotations + List annotations } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy index c7567d34b2..c07e362578 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy @@ -44,8 +44,12 @@ class WorkflowRun implements CidSerializable { * Workflow parameters */ List params + /** + * Resolved Configuration + */ + String resolvedConfig /** * Annotations attached to the workflow run */ - Map annotations + List annotations } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index 78b98d005e..f560340af1 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -12,6 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * */ package nextflow.data.cid @@ -21,10 +22,13 @@ import nextflow.data.cid.model.TaskOutputs import nextflow.file.FileHolder import nextflow.processor.TaskHandler import nextflow.script.TokenVar +import nextflow.script.params.EnvOutParam import nextflow.script.params.FileInParam import nextflow.script.params.FileOutParam import nextflow.script.params.InParam import nextflow.script.params.OutParam +import nextflow.script.params.StdInParam +import nextflow.script.params.StdOutParam import nextflow.script.params.ValueInParam import nextflow.script.params.ValueOutParam @@ -179,9 +183,24 @@ class CidObserverTest extends Specification { folder?.deleteDir() } + def 'should get parameter type' () { + expect: + CidObserver.getParameterType(PARAM) == STRING + where: + PARAM | STRING + new FileInParam(null, []) | "path" + new ValueOutParam(null, []) | "val" + new EnvOutParam(null, []) | "env" + new StdInParam(null, []) | "stdin" + new StdOutParam(null, []) | "stdout" + Path.of("test") | "Path" + ["test"] | "Collection" + [key:"value"] | "Map" + } + def 'should save task run' () { given: - def folder = Files.createTempDirectory('test').toRealPath() + def folder = Files.createTempDirectory('test') def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] def uniqueId = UUID.randomUUID() def workDir = folder.resolve("work") @@ -267,9 +286,9 @@ class CidObserverTest extends Specification { new Checksum(sourceHash, "nextflow", "standard"), new Checksum(scriptHash, "nextflow", "standard"), [ - new Parameter(FileInParam.simpleName, "file1", ['cid://78567890/outputs/file1.txt']), - new Parameter(FileInParam.simpleName, "file2", [[path: normalizer.normalizePath(file), checksum: [value:fileHash, algorithm: "nextflow", mode: "standard"]]]), - new Parameter(ValueInParam.simpleName, "id", "value") + new Parameter("path", "file1", ['cid://78567890/file1.txt']), + new Parameter("path", "file2", [[path: normalizer.normalizePath(file), checksum: [value:fileHash, algorithm: "nextflow", mode: "standard"]]]), + new Parameter("val", "id", "value") ], null, null, null, null, [:], [], "cid://hash", null) def dataOutput1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), "cid://1234567890", "cid://hash", "cid://1234567890", attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) @@ -278,10 +297,10 @@ class CidObserverTest extends Specification { when: observer.onProcessComplete(handler, null ) - def taskRunResult = store.load(hash.toString()) as nextflow.data.cid.model.TaskRun - def dataOutputResult1 = store.load("${hash}/outputs/fileOut1.txt") as DataOutput - def dataOutputResult2 = store.load("${hash}/outputs/fileOut2.txt") as DataOutput - def taskOutputsResult = store.load("${hash}/outputs") as TaskOutputs + def taskRunResult = store.load("${hash.toString()}") + def dataOutputResult1 = store.load("${hash}/fileOut1.txt") as DataOutput + def dataOutputResult2 = store.load("${hash}/fileOut2.txt") as DataOutput + def taskOutputsResult = store.load("${hash}#outputs") as TaskOutputs then: taskRunResult == taskDescription dataOutputResult1 == dataOutput1 @@ -289,13 +308,13 @@ class CidObserverTest extends Specification { taskOutputsResult.taskRun == "cid://1234567890" taskOutputsResult.workflowRun == "cid://hash" taskOutputsResult.outputs.size() == 3 - taskOutputsResult.outputs.get(0).type == FileOutParam.simpleName + taskOutputsResult.outputs.get(0).type == "path" taskOutputsResult.outputs.get(0).name == "file1" - taskOutputsResult.outputs.get(0).value == "cid://1234567890/outputs/fileOut1.txt" - taskOutputsResult.outputs.get(1).type == FileOutParam.simpleName + taskOutputsResult.outputs.get(0).value == "cid://1234567890/fileOut1.txt" + taskOutputsResult.outputs.get(1).type == "path" taskOutputsResult.outputs.get(1).name == "file2" - taskOutputsResult.outputs.get(1).value == ["cid://1234567890/outputs/fileOut2.txt"] - taskOutputsResult.outputs.get(2).type == ValueOutParam.simpleName + taskOutputsResult.outputs.get(1).value == ["cid://1234567890/fileOut2.txt"] + taskOutputsResult.outputs.get(2).type == "val" taskOutputsResult.outputs.get(2).name == "id" taskOutputsResult.outputs.get(2).value == "value" @@ -341,7 +360,7 @@ class CidObserverTest extends Specification { when: observer.storeTaskOutput(task, outFile) then: - folder.resolve(".meta/${hash}/outputs/foo/bar/file.bam/.data.json").text == new CidEncoder().encode(output) + folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text == new CidEncoder().encode(output) cleanup: folder?.deleteDir() @@ -502,9 +521,9 @@ class CidObserverTest extends Specification { def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() def output1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), - "cid://123987/outputs/file.bam", "$CID_PROT${observer.executionHash}", null, + "cid://123987/file.bam", "$CID_PROT${observer.executionHash}", null, attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) - folder.resolve(".meta/${observer.executionHash}/outputs/foo/file.bam/.data.json").text == encoder.encode(output1) + folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == encoder.encode(output1) when: 'publish without source path' def outFile2 = outputDir.resolve('foo/file2.bam') @@ -518,14 +537,14 @@ class CidObserverTest extends Specification { def output2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), "cid://${observer.executionHash}" , "cid://${observer.executionHash}", null, attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) - folder.resolve(".meta/${observer.executionHash}/outputs/foo/file2.bam/.data.json").text == encoder.encode(output2) + folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == encoder.encode(output2) when: 'Workflow complete' observer.onFlowComplete() then: 'Check history file is updated and Workflow Result is written in the cid store' def finalCid = store.getHistoryLog().getRecord(uniqueId).runCid.substring(CID_PROT.size()) - def resultsRetrieved = store.load("${finalCid}/outputs") as WorkflowOutputs - resultsRetrieved.outputs == [a: "cid://${observer.executionHash}/outputs/foo/file.bam", b: "cid://${observer.executionHash}/outputs/foo/file2.bam"] + def resultsRetrieved = store.load("${finalCid}#outputs") as WorkflowOutputs + resultsRetrieved.outputs == [new Parameter(Path.simpleName, "a", "cid://${observer.executionHash}/foo/file.bam"), new Parameter(Path.simpleName, "b", "cid://${observer.executionHash}/foo/file2.bam")] cleanup: folder?.deleteDir() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy index c81a8d7116..6b4fe5930f 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy @@ -24,15 +24,15 @@ class CidPropertyValidationTest extends Specification{ def 'should throw exception when property does not exist'(){ when: - new CidPropertyValidator().validate(['value','not_existing'] as String[]) + new CidPropertyValidator().validate(['value','not_existing']) then: def e = thrown(IllegalArgumentException) - e.message == "Property 'not_existing' doesn't exist in the CID model" + e.message.startsWith( "Property 'not_existing' doesn't exist in the CID model") } def 'should not throw exception when property exist'(){ when: - new CidPropertyValidator().validate(['value', 'outputs'] as String[]) + new CidPropertyValidator().validate(['value', 'outputs']) then: noExceptionThrown() } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy index 8119337bb1..1002a6835c 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy @@ -29,6 +29,8 @@ import spock.lang.TempDir import java.nio.file.Path import java.nio.file.attribute.FileTime import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset class CidUtilsTest extends Specification{ @@ -50,7 +52,7 @@ class CidUtilsTest extends Specification{ where: FILE_TIME | DATE null | null - FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234) + FileTime.fromMillis(1234) | OffsetDateTime.ofInstant(Instant.ofEpochMilli(1234), ZoneOffset.UTC) } def 'should convert to FileTime'(){ @@ -59,7 +61,7 @@ class CidUtilsTest extends Specification{ where: FILE_TIME | DATE null | null - FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234).toString() + FileTime.fromMillis(1234) | OffsetDateTime.ofInstant(Instant.ofEpochMilli(1234), ZoneOffset.UTC) } @@ -70,11 +72,11 @@ class CidUtilsTest extends Specification{ def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) - def outputs1 = new WorkflowOutputs(Instant.now(), "cid://testKey", [output: "name"] ) + def outputs1 = new WorkflowOutputs(OffsetDateTime.now(), "cid://testKey", [new Parameter( "String", "output", "name")] ) def cidStore = new DefaultCidStore() cidStore.open(config) cidStore.save(key, value1) - cidStore.save("$key/outputs", outputs1) + cidStore.save("$key#outputs", outputs1) when: List params = CidUtils.query(cidStore, new URI('cid://testKey#params')) @@ -87,14 +89,24 @@ class CidUtilsTest extends Specification{ List outputs = CidUtils.query(cidStore, new URI('cid://testKey#outputs')) then: outputs.size() == 1 - outputs[0] instanceof Map - outputs[0]['output'] == "name" + outputs[0] instanceof List + def param = (outputs[0] as List)[0] as Parameter + param.name == "output" - expect: - CidUtils.query(cidStore, new URI('cid://testKey#no-exist')) == [] - CidUtils.query(cidStore, new URI('cid://testKey#outputs.no-exist')) == [] - CidUtils.query(cidStore, new URI('cid://no-exist#something')) == [] + when: + CidUtils.query(cidStore, new URI('cid://testKey#no-exist')) + then: + thrown(IllegalArgumentException) + + when: + CidUtils.query(cidStore, new URI('cid://testKey#outputs.no-exist')) + then: + thrown(IllegalArgumentException) + when: + CidUtils.query(cidStore, new URI('cid://no-exist#something')) + then: + thrown(IllegalArgumentException) } def "should parse children elements form Fragment string"() { diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy index d8018b64f3..8b20bf0823 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy @@ -16,21 +16,25 @@ package nextflow.data.cid -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowRun +import nextflow.data.cid.model.Annotation import java.nio.file.Files import java.nio.file.Path import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset import nextflow.data.cid.model.Checksum +import nextflow.data.cid.model.DataPath +import nextflow.data.cid.model.DataOutput +import nextflow.data.cid.model.Parameter +import nextflow.data.cid.model.Workflow +import nextflow.data.cid.model.WorkflowRun import nextflow.data.cid.serde.CidEncoder import nextflow.data.config.DataConfig import spock.lang.Specification import spock.lang.TempDir + /** * * @author Jorge Ejarque @@ -103,17 +107,17 @@ class DefaultCidStoreTest extends Specification { def 'should query' () { given: def uniqueId = UUID.randomUUID() - def time = Instant.ofEpochMilli(1234567) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(1234567), ZoneOffset.UTC) def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) def key2 = "testKey2" - def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [key1:"value1", key2:"value2"]) + def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) def key3 = "testKey3" - def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [key2:"value2", key3:"value3"]) + def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key2","value2"), new Annotation("key3","value3")]) def key4 = "testKey4" - def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [key3:"value3", key4:"value4"]) + def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) def cidStore = new DefaultCidStore() cidStore.open(config) @@ -123,7 +127,7 @@ class DefaultCidStoreTest extends Specification { cidStore.save(key4, value4) when: - def results = cidStore.search("type=DataOutput&annotations.key2=value2") + def results = cidStore.search("type=DataOutput&annotations.key=key2&annotations.value=value2") then: results.size() == 2 results.keySet().containsAll([key2,key3]) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy index 9d0fd89b2d..4a82216c57 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy @@ -38,6 +38,8 @@ import test.OutputCapture import java.nio.file.Files import java.nio.file.Path import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset class CidCommandImplTest extends Specification{ @@ -116,7 +118,7 @@ class CidCommandImplTest extends Specification{ given: def cidFile = storeLocation.resolve(".meta/12345/.data.json") Files.createDirectories(cidFile.parent) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def encoder = new CidEncoder().withPrettyPrint(true) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam","cid://123987/", null, 1234, time, time, null) @@ -151,7 +153,7 @@ class CidCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "No entries found for cid://12345" + stdout[0] == "Error loading cid://12345. Cid object 12345 not found." } def 'should get lineage cid content' (){ @@ -170,7 +172,7 @@ class CidCommandImplTest extends Specification{ Files.createDirectories(cidFile4.parent) Files.createDirectories(cidFile5.parent) def encoder = new CidEncoder() - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://45678", null, 1234, time, time, null) cidFile.text = encoder.encode(entry) @@ -239,7 +241,7 @@ class CidCommandImplTest extends Specification{ Files.createDirectories(cidFile.parent) Files.createDirectories(cidFile3.parent) def encoder = new CidEncoder() - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://12345", "cid://12345", null, 1234, time, time, null) cidFile.text = encoder.encode(entry) @@ -282,7 +284,7 @@ class CidCommandImplTest extends Specification{ def cidFile = storeLocation.resolve(".meta/12345/.data.json") Files.createDirectories(cidFile.parent) def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) def jsonSer = encoder.encode(entry) @@ -309,7 +311,7 @@ class CidCommandImplTest extends Specification{ def cidFile2 = storeLocation.resolve(".meta/67890/.data.json") Files.createDirectories(cidFile2.parent) def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), @@ -338,7 +340,7 @@ class CidCommandImplTest extends Specification{ def cidFile2 = storeLocation.resolve(".meta/67890/.data.json") Files.createDirectories(cidFile2.parent) def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), @@ -388,7 +390,7 @@ class CidCommandImplTest extends Specification{ def cidFile = storeLocation.resolve(".meta/12345/.data.json") Files.createDirectories(cidFile.parent) def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) cidFile.text = encoder.encode(entry) @@ -439,7 +441,7 @@ class CidCommandImplTest extends Specification{ def cidFile2 = storeLocation.resolve(".meta/123987/file2.bam/.data.json") Files.createDirectories(cidFile2.parent) def encoder = new CidEncoder().withPrettyPrint(true) - def time = Instant.ofEpochMilli(123456789) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy index ce813da4fc..7e54a2ee7f 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy @@ -32,12 +32,13 @@ import test.OutputCapture import java.nio.file.Files import java.nio.file.Path import java.nio.file.ProviderMismatchException -import java.time.Instant import spock.lang.Shared import spock.lang.Specification import spock.lang.Unroll +import java.time.OffsetDateTime + /** * CID Path Tests * @author Jorge Ejarque @@ -137,8 +138,8 @@ class CidPathTest extends Specification { cid.resolve('12345/.data.json').text = '{"type":"TaskRun"}' cid.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + outputFolder.toString() + '"}' cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"DataOutput", "path": "' + outputFile.toString() + '"}' - def time = Instant.now() - def wfResultsMetadata = new CidEncoder().withPrettyPrint(true).encode(new WorkflowOutputs(time, "cid://1234", [a: "cid://1234/a.txt"])) + def time = OffsetDateTime.now() + def wfResultsMetadata = new CidEncoder().withPrettyPrint(true).encode(new WorkflowOutputs(time, "cid://1234", [new Parameter( "Path", "a", "cid://1234/a.txt")])) cid.resolve('5678/').mkdirs() cid.resolve('5678/.data.json').text = wfResultsMetadata @@ -188,12 +189,12 @@ class CidPathTest extends Specification { def result2 = new CidPath(cidFs, '5678#outputs').getTargetPath(true) then: result2 instanceof CidMetadataPath - result2.text == CidUtils.encodeSearchOutputs([a: "cid://1234/a.txt"], true) + result2.text == CidUtils.encodeSearchOutputs([new Parameter("Path","a", "cid://1234/a.txt")], true) when: 'Cid subobject does not exist' new CidPath(cidFs, '23456#notexists').getTargetPath(true) then: - thrown(FileNotFoundException) + thrown(IllegalArgumentException) cleanup: cid.resolve('12345').deleteDir() @@ -212,12 +213,12 @@ class CidPathTest extends Specification { p.text == '"repo"' when: 'outputs' - def outputs = new WorkflowOutputs(Instant.now(), "cid://12345", [ samples: ["sample1", "sample2"]]) + def outputs = new WorkflowOutputs(OffsetDateTime.now(), "cid://12345", [ new Parameter("Collection", "samples", ["sample1", "sample2"])]) cidFs.cidStore.save("12345/outputs", outputs) Path p2 = CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["outputs"] as String[]) then: p2 instanceof CidMetadataPath - p2.text == CidUtils.encodeSearchOutputs([ samples: ["sample1", "sample2"]], true) + p2.text == CidUtils.encodeSearchOutputs([new Parameter("Collection", "samples", ["sample1", "sample2"])], true) when: 'child does not exists' CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["no-exist"] as String[]) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy index 65b38adb32..68456faa3d 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy @@ -27,7 +27,7 @@ import nextflow.data.cid.model.WorkflowOutputs import nextflow.data.cid.model.WorkflowRun import spock.lang.Specification -import java.time.Instant +import java.time.OffsetDateTime class CidEncoderTest extends Specification{ @@ -87,8 +87,8 @@ class CidEncoderTest extends Specification{ given: def encoder = new CidEncoder() and: - def time = Instant.now() - def wfResults = new WorkflowOutputs(time, "cid://1234", [a: "A", b: "B"]) + def time = OffsetDateTime.now() + def wfResults = new WorkflowOutputs(time, "cid://1234", [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")]) when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) @@ -98,7 +98,7 @@ class CidEncoderTest extends Specification{ def result = object as WorkflowOutputs result.createdAt == time result.workflowRun == "cid://1234" - result.outputs == [a: "A", b: "B"] + result.outputs == [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")] } def 'should encode and decode TaskRun'() { @@ -137,7 +137,7 @@ class CidEncoderTest extends Specification{ given: def encoder = new CidEncoder() and: - def time = Instant.now() + def time = OffsetDateTime.now() def parameter = new Parameter("a","b", "c") def wfResults = new TaskOutputs("cid://1234", "cid://5678", time, [parameter], null) when: diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy index 5ebccee529..2bf19f0462 100644 --- a/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy @@ -16,6 +16,8 @@ package nextflow.serde.gson +import com.google.gson.stream.JsonToken + import java.time.Instant import java.time.OffsetDateTime @@ -38,6 +40,10 @@ class OffsetDateTimeAdapter extends TypeAdapter { @Override OffsetDateTime read(JsonReader reader) throws IOException { + if (reader.peek() == JsonToken.NULL) { + reader.nextNull(); + return null; + } return OffsetDateTime.parse(reader.nextString()) } } diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy index 343cfca066..96ed7db377 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy +++ b/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy @@ -17,6 +17,7 @@ package nextflow.data.cid.h2 +import nextflow.data.cid.model.Annotation import nextflow.data.cid.model.Checksum import nextflow.data.cid.model.DataPath import nextflow.data.cid.model.DataOutput @@ -28,6 +29,7 @@ import spock.lang.Shared import spock.lang.Specification import java.time.Instant +import java.time.OffsetDateTime /** * @@ -62,22 +64,22 @@ class H2CidStoreTest extends Specification { def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") - def time = Instant.ofEpochMilli(1234567) + def time = OffsetDateTime.now() def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) def key2 = "testKey2" - def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [key1: "value1", key2: "value2"]) + def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [new Annotation("key1", "value1"), new Annotation("key2", "value2")]) def key3 = "testKey3" - def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [key2: "value2", key3: "value3"]) + def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [new Annotation("key2", "value2"), new Annotation("key3", "value3")]) def key4 = "testKey4" - def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [key3: "value3", key4: "value4"]) + def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [new Annotation("key3", "value3"), new Annotation("key4", "value4")]) store.save(key, value1) store.save(key2, value2) store.save(key3, value3) store.save(key4, value4) when: - def results = store.search("type=DataOutput&annotations.key2=value2") + def results = store.search("type=DataOutput&annotations.key=key2&annotations.value=value2") then: results.size() == 2 } From f7d84ba0f26b9d881482ab1a29cd9717933d2b44 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 15 Apr 2025 11:22:30 +0200 Subject: [PATCH 48/72] fix merged publishop Signed-off-by: jorgee --- .../src/main/groovy/nextflow/extension/PublishOp.groovy | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index d011a5f168..caa144e5c1 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -97,8 +97,8 @@ class PublishOp { ? [saveAs: targetResolver] : [path: targetResolver] - if (opts.annotations instanceof Closure){ - final annotations = opts.annotations as Closure + if (publishOpts.annotations instanceof Closure){ + final annotations = publishOpts.annotations as Closure overrides.annotations = annotations.call(value) as Map } final publisher = PublishDir.create(publishOpts + overrides) @@ -219,7 +219,7 @@ class PublishOp { else { log.warn "Invalid extension '${ext}' for index file '${indexPath}' -- should be CSV, JSON, or YAML" } - session.notifyFilePublish(indexPath, null, opts.tags as Map) + session.notifyFilePublish(indexPath, null, publishOpts.tags as Map) } log.trace "Publish operator complete" From 1c06e6932b7d559d477b75790449b1871cb6b9e4 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 15 Apr 2025 12:21:13 +0200 Subject: [PATCH 49/72] Fix failing test on mac [ci fast] Signed-off-by: Paolo Di Tommaso --- .../nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index f560340af1..f19fea5254 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -200,7 +200,7 @@ class CidObserverTest extends Specification { def 'should save task run' () { given: - def folder = Files.createTempDirectory('test') + def folder = Files.createTempDirectory('test').toRealPath() def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] def uniqueId = UUID.randomUUID() def workDir = folder.resolve("work") From 9f41c28515bb261fca9f6abcad19f27156bd5fcc Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 15 Apr 2025 15:36:11 +0200 Subject: [PATCH 50/72] change getTargetPath with flags to different methods Signed-off-by: jorgee --- .../data/cid/fs/CidFileSystemProvider.groovy | 12 +++--- .../main/nextflow/data/cid/fs/CidPath.groovy | 39 +++++++++++++++---- .../nextflow/data/cid/fs/CidPathTest.groovy | 6 +-- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy index cf2e1566de..7ae06216c1 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy @@ -120,7 +120,7 @@ class CidFileSystemProvider extends FileSystemProvider { } private static InputStream newInputStream0(CidPath cid, OpenOption... options) throws IOException { - final realPath = cid.getTargetPath(true) + final realPath = cid.getTargetOrMetadataPath() if (realPath instanceof CidMetadataPath) return (realPath as CidMetadataPath).newInputStream() return realPath.fileSystem.provider().newInputStream(realPath, options) @@ -198,7 +198,7 @@ class CidFileSystemProvider extends FileSystemProvider { if (cid instanceof CidMetadataPath) { return (cid as CidMetadataPath).newSeekableByteChannel() } - final realPath = cid.getTargetPath(true) + final realPath = cid.getTargetOrMetadataPath() if (realPath instanceof CidMetadataPath) { return (realPath as CidMetadataPath).newSeekableByteChannel() } else { @@ -210,7 +210,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override DirectoryStream newDirectoryStream(Path path, DirectoryStream.Filter filter) throws IOException { final cid = toCidPath(path) - final real = cid.getTargetPath(false) + final real = cid.getTargetPath() final stream = real .getFileSystem() .provider() @@ -309,7 +309,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override boolean isHidden(Path path) throws IOException { - return toCidPath(path).getTargetPath(true).isHidden() + return toCidPath(path).getTargetOrMetadataPath().isHidden() } @Override @@ -327,7 +327,7 @@ class CidFileSystemProvider extends FileSystemProvider { } private void checkAccess0(CidPath cid, AccessMode... modes) { - final real = cid.getTargetPath(true) + final real = cid.getTargetOrMetadataPath() if (real instanceof CidMetadataPath) return real.fileSystem.provider().checkAccess(real, modes) @@ -356,7 +356,7 @@ class CidFileSystemProvider extends FileSystemProvider { } private A readAttributes0(CidPath cid, Class type, LinkOption... options) throws IOException { - final real = cid.getTargetPath(true) + final real = cid.getTargetOrMetadataPath() if (real instanceof CidMetadataPath) return (real as CidMetadataPath).readAttributes(type) return real.fileSystem.provider().readAttributes(real, type, options) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy index 0b9b83bcba..0fba3cf024 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy @@ -140,17 +140,26 @@ class CidPath implements Path, LogicalDataPath { protected String getFilePath(){ this.filePath } + /** - * Finds the target path of a CID path - **/ - protected static Path findTarget(CidFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception{ + * Finds the target path of a CID path. + * @param fs CID fileSystem associated to the CidPath to find + * @param filePath Path associated to the CidPath to find + * @param resultsAsPath True to return metadata descriptions as CidMetadataPath + * @param children Sub-object/path inside the description + * @return Path or CidMetadataPath associated to the CidPath + * @throws Exception + * IllegalArgumentException if the filepath, filesystem or its CidStore are null. + * FileNotFoundException if the filePath or children are not found in the CidStore. + */ + protected static Path findTarget(CidFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception { if( !fs ) throw new IllegalArgumentException("Cannot get target path for a relative CidPath") if( filePath.isEmpty() || filePath == SEPARATOR ) throw new IllegalArgumentException("Cannot get target path for an empty CidPath") final store = fs.getCidStore() if( !store ) - throw new Exception("CID store not found. Check Nextflow configuration.") + throw new IllegalArgumentException("CID store not found. Check Nextflow configuration.") final object = store.load(filePath) if ( object ){ if( object instanceof DataOutput ) { @@ -426,15 +435,29 @@ class CidPath implements Path, LogicalDataPath { @Override Path toRealPath(LinkOption... options) throws IOException { - return this.getTargetPath(true) + return this.getTargetOrMetadataPath() } Path toTargetPath() { - return getTargetPath(true) + return getTargetOrMetadataPath() + } + /** + * Get the path associated to a DataOutput metadata. + * + * @return Path associated to a DataOutput + * @throws FileNotFoundException if the metadata associated to the CidPath does not exist or its type is not a DataOutput. + */ + protected Path getTargetPath() { + return findTarget(fileSystem, filePath, false, parseChildrenFormFragment(fragment)) } - protected Path getTargetPath(boolean resultsAsPath=false){ - return findTarget(fileSystem, filePath, resultsAsPath, parseChildrenFormFragment(fragment)) + /** + * Get the path associated to any metadata object. + * @return Path associated to a DataOutput or CidMetadataFile with the metadata object for other types. + * @throws FileNotFoundException if the metadata associated to the CidPath does not exist. + */ + protected Path getTargetOrMetadataPath(){ + return findTarget(fileSystem, filePath, true, parseChildrenFormFragment(fragment)) } @Override diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy index 7e54a2ee7f..7931da5de4 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy @@ -180,19 +180,19 @@ class CidPathTest extends Specification { thrown(FileNotFoundException) when: 'Cid description' - def result = new CidPath(cidFs, '5678').getTargetPath(true) + def result = new CidPath(cidFs, '5678').getTargetOrMetadataPath() then: result instanceof CidMetadataPath result.text == wfResultsMetadata when: 'Cid description subobject' - def result2 = new CidPath(cidFs, '5678#outputs').getTargetPath(true) + def result2 = new CidPath(cidFs, '5678#outputs').getTargetOrMetadataPath() then: result2 instanceof CidMetadataPath result2.text == CidUtils.encodeSearchOutputs([new Parameter("Path","a", "cid://1234/a.txt")], true) when: 'Cid subobject does not exist' - new CidPath(cidFs, '23456#notexists').getTargetPath(true) + new CidPath(cidFs, '23456#notexists').getTargetOrMetadataPath() then: thrown(IllegalArgumentException) From f43e46c6f731a876ff2f366835ea40ae9cb96cf8 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 15 Apr 2025 15:53:21 +0200 Subject: [PATCH 51/72] change resolved config from string to map Signed-off-by: jorgee --- modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy | 4 +++- .../src/main/nextflow/data/cid/model/WorkflowRun.groovy | 2 +- .../nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy index b67ebca8bd..eafd82febd 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy @@ -16,6 +16,8 @@ package nextflow.data.cid +import nextflow.util.SecretHelper + import java.time.OffsetDateTime import static nextflow.data.cid.fs.CidPath.* @@ -166,7 +168,7 @@ class CidObserver implements TraceObserver { session.uniqueId.toString(), session.runName, getNormalizedParams(session.params, normalizer), - session.resolvedConfig + SecretHelper.hideSecrets(session.config.deepClone()) as Map ) final executionHash = CacheHelper.hasher(value).hash().toString() store.save(executionHash, value) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy index c07e362578..77903d5850 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy @@ -47,7 +47,7 @@ class WorkflowRun implements CidSerializable { /** * Resolved Configuration */ - String resolvedConfig + Map resolvedConfig /** * Annotations attached to the workflow run */ diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy index f19fea5254..3c1d459c23 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy @@ -172,7 +172,7 @@ class CidObserverTest extends Specification { def observer = new CidObserver(session, store) def mainScript = new DataPath("file://${scriptFile.toString()}", new Checksum("78910", "nextflow", "standard")) def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) - def workflowRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", []) + def workflowRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [], config) when: observer.onFlowCreate(session) observer.onFlowBegin() From 5208c388dfef21897e8f7c0a2b4f3f7ea9a26caf Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Tue, 15 Apr 2025 18:47:04 +0200 Subject: [PATCH 52/72] CID to lineage rename (#5977) Signed-off-by: jorgee Signed-off-by: Jorge Ejarque Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- build.gradle | 6 +- modules/nextflow/build.gradle | 2 +- .../cli/{CmdCid.groovy => CmdLineage.groovy} | 48 +-- .../main/groovy/nextflow/cli/CmdRun.groovy | 2 +- .../main/groovy/nextflow/cli/Launcher.groovy | 2 +- ...mdCidTest.groovy => CmdLineageTest.groovy} | 185 ++++---- modules/{nf-cid => nf-lineage}/build.gradle | 0 .../lineage/DefaultLinHistoryLog.groovy} | 32 +- .../nextflow/lineage/DefaultLinStore.groovy} | 51 +-- .../lineage/DefaultLinStoreFactory.groovy} | 14 +- .../nextflow/lineage/LinHistoryLog.groovy} | 29 +- .../nextflow/lineage/LinHistoryRecord.groovy} | 20 +- .../main/nextflow/lineage/LinObserver.groovy} | 58 +-- .../lineage/LinObserverFactory.groovy} | 10 +- .../lineage/LinPropertyValidator.groovy} | 32 +- .../main/nextflow/lineage/LinStore.groovy} | 38 +- .../nextflow/lineage/LinStoreFactory.groovy} | 30 +- .../main/nextflow/lineage/LinUtils.groovy} | 76 ++-- .../lineage/cli/LinCommandImpl.groovy} | 114 ++--- .../lineage/config/LineageConfig.groovy} | 20 +- .../lineage/config/LineageStoreOpts.groovy} | 6 +- .../nextflow/lineage/fs/LinFileSystem.groovy} | 40 +- .../lineage/fs/LinFileSystemProvider.groovy} | 132 +++--- .../lineage/fs/LinMetadataPath.groovy} | 8 +- .../fs/LinMetadataSeekableByteChannel.groovy} | 6 +- .../main/nextflow/lineage/fs/LinPath.groovy} | 173 ++++---- .../lineage/fs/LinPathFactory.groovy} | 20 +- .../nextflow/lineage}/model/Annotation.groovy | 2 +- .../nextflow/lineage}/model/Checksum.groovy | 2 +- .../nextflow/lineage}/model/DataOutput.groovy | 6 +- .../nextflow/lineage}/model/DataPath.groovy | 2 +- .../nextflow/lineage}/model/Parameter.groovy | 2 +- .../lineage}/model/TaskOutputs.groovy | 7 +- .../nextflow/lineage}/model/TaskRun.groovy | 6 +- .../nextflow/lineage}/model/Workflow.groovy | 6 +- .../lineage}/model/WorkflowOutputs.groovy | 7 +- .../lineage}/model/WorkflowRun.groovy | 6 +- .../nextflow/lineage/serde/LinEncoder.groovy} | 26 +- .../lineage/serde/LinSerializable.groovy} | 6 +- .../src/resources/META-INF/extensions.idx | 6 +- .../java.nio.file.spi.FileSystemProvider | 2 +- .../lineage/DefaultLinHistoryLogTest.groovy} | 61 ++- .../DefaultLinStoreFactoryTest.groovy} | 10 +- .../lineage/DefaultLinStoreTest.groovy} | 76 ++-- .../lineage/LinHistoryRecordTest.groovy} | 24 +- .../nextflow/lineage/LinObserverTest.groovy} | 160 +++---- .../lineage/LinPropertyValidationTest.groovy} | 10 +- .../nextflow/lineage/LinUtilsTest.groovy} | 82 ++-- .../lineage/cli/LinCommandImplTest.groovy} | 278 ++++++------ .../lineage/config/LineageConfigTest.groovy} | 10 +- .../fs/LinFileSystemProviderTest.groovy} | 202 ++++----- .../lineage/fs/LinPathFactoryTest.groovy} | 38 +- .../nextflow/lineage/fs/LinPathTest.groovy} | 400 +++++++++--------- .../lineage}/model/ChecksumTest.groovy | 2 +- .../lineage/serde/LinEncoderTest.groovy} | 54 +-- packing.gradle | 4 +- .../{nf-cid-h2 => nf-lineage-h2}/build.gradle | 4 +- .../lineage/h2/H2LinHistoryLog.groovy} | 42 +- .../nextflow/lineage/h2/H2LinPlugin.groovy} | 6 +- .../nextflow/lineage/h2/H2LinStore.groovy} | 60 +-- .../lineage/h2/H2LinStoreFactory.groovy} | 16 +- .../src/resources/META-INF/MANIFEST.MF | 4 +- .../src/resources/META-INF/extensions.idx | 4 +- .../lineage/h2/H2LinHistoryLogTest.groovy} | 28 +- .../lineage/h2/H2LinStoreTest.groovy} | 35 +- settings.gradle | 4 +- 66 files changed, 1434 insertions(+), 1420 deletions(-) rename modules/nextflow/src/main/groovy/nextflow/cli/{CmdCid.groovy => CmdLineage.groovy} (84%) rename modules/nextflow/src/test/groovy/nextflow/cli/{CmdCidTest.groovy => CmdLineageTest.groovy} (64%) rename modules/{nf-cid => nf-lineage}/build.gradle (100%) rename modules/{nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy => nf-lineage/src/main/nextflow/lineage/DefaultLinHistoryLog.groovy} (66%) rename modules/{nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy => nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy} (74%) rename modules/{nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy => nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy} (79%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy => nf-lineage/src/main/nextflow/lineage/LinHistoryLog.groovy} (61%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy => nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy} (79%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidObserver.groovy => nf-lineage/src/main/nextflow/lineage/LinObserver.groovy} (92%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy => nf-lineage/src/main/nextflow/lineage/LinObserverFactory.groovy} (79%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy => nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy} (69%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidStore.groovy => nf-lineage/src/main/nextflow/lineage/LinStore.groovy} (55%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy => nf-lineage/src/main/nextflow/lineage/LinStoreFactory.groovy} (67%) rename modules/{nf-cid/src/main/nextflow/data/cid/CidUtils.groovy => nf-lineage/src/main/nextflow/lineage/LinUtils.groovy} (82%) rename modules/{nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy => nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy} (75%) rename modules/{nf-cid/src/main/nextflow/data/config/DataConfig.groovy => nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy} (69%) rename modules/{nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy => nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy} (90%) rename modules/{nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy => nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy} (74%) rename modules/{nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy => nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy} (72%) rename modules/{nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy => nf-lineage/src/main/nextflow/lineage/fs/LinMetadataPath.groovy} (89%) rename modules/{nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy => nf-lineage/src/main/nextflow/lineage/fs/LinMetadataSeekableByteChannel.groovy} (93%) rename modules/{nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy => nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy} (74%) rename modules/{nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy => nf-lineage/src/main/nextflow/lineage/fs/LinPathFactory.groovy} (68%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/Annotation.groovy (96%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/Checksum.groovy (97%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/DataOutput.groovy (93%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/DataPath.groovy (96%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/Parameter.groovy (96%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/TaskOutputs.groovy (89%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/TaskRun.groovy (93%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/Workflow.groovy (89%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/WorkflowOutputs.groovy (88%) rename modules/{nf-cid/src/main/nextflow/data/cid => nf-lineage/src/main/nextflow/lineage}/model/WorkflowRun.groovy (91%) rename modules/{nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy => nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy} (68%) rename modules/{nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy => nf-lineage/src/main/nextflow/lineage/serde/LinSerializable.groovy} (84%) rename modules/{nf-cid => nf-lineage}/src/resources/META-INF/extensions.idx (83%) rename modules/{nf-cid => nf-lineage}/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider (93%) rename modules/{nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy => nf-lineage/src/test/nextflow/lineage/DefaultLinHistoryLogTest.groovy} (61%) rename modules/{nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy => nf-lineage/src/test/nextflow/lineage/DefaultLinStoreFactoryTest.groovy} (85%) rename modules/{nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy => nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy} (67%) rename modules/{nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy => nf-lineage/src/test/nextflow/lineage/LinHistoryRecordTest.groovy} (66%) rename modules/{nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy => nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy} (80%) rename modules/{nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy => nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy} (80%) rename modules/{nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy => nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy} (77%) rename modules/{nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy => nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy} (62%) rename modules/{nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy => nf-lineage/src/test/nextflow/lineage/config/LineageConfigTest.groovy} (80%) rename modules/{nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy => nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy} (63%) rename modules/{nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy => nf-lineage/src/test/nextflow/lineage/fs/LinPathFactoryTest.groovy} (63%) rename modules/{nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy => nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy} (57%) rename modules/{nf-cid/src/test/nextflow/data/cid => nf-lineage/src/test/nextflow/lineage}/model/ChecksumTest.groovy (98%) rename modules/{nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy => nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy} (81%) rename plugins/{nf-cid-h2 => nf-lineage-h2}/build.gradle (94%) rename plugins/{nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy => nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy} (71%) rename plugins/{nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy => nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy} (89%) rename plugins/{nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy => nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy} (75%) rename plugins/{nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy => nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy} (72%) rename plugins/{nf-cid-h2 => nf-lineage-h2}/src/resources/META-INF/MANIFEST.MF (59%) rename plugins/{nf-cid-h2 => nf-lineage-h2}/src/resources/META-INF/extensions.idx (89%) rename plugins/{nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy => nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy} (82%) rename plugins/{nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy => nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy} (69%) diff --git a/build.gradle b/build.gradle index 5996479dad..111d33c937 100644 --- a/build.gradle +++ b/build.gradle @@ -237,7 +237,7 @@ task compile { def getRuntimeConfigs() { def names = subprojects - .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-cid'] } + .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-lineage'] } .collect { it.name } FileCollection result = null @@ -263,7 +263,7 @@ task exportClasspath { def home = System.getProperty('user.home') def all = getRuntimeConfigs() def libs = all.collect { File file -> /*println file.canonicalPath.replace(home, '$HOME');*/ file.canonicalPath; } - ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-cid'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } + ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-lineage'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } file('.launch.classpath').text = libs.unique().join(':') } } @@ -276,7 +276,7 @@ ext.nexusEmail = project.findProperty('nexusEmail') // `signing.keyId` property needs to be defined in the `gradle.properties` file ext.enableSignArchives = project.findProperty('signing.keyId') -ext.coreProjects = projects( ':nextflow', ':nf-commons', ':nf-httpfs', ':nf-lang', ':nf-cid' ) +ext.coreProjects = projects( ':nextflow', ':nf-commons', ':nf-httpfs', ':nf-lang', ':nf-lineage' ) configure(coreProjects) { group = 'io.nextflow' diff --git a/modules/nextflow/build.gradle b/modules/nextflow/build.gradle index 0035f7e7a5..9d25cfc9bd 100644 --- a/modules/nextflow/build.gradle +++ b/modules/nextflow/build.gradle @@ -51,7 +51,7 @@ dependencies { api 'io.seqera:lib-trace:0.1.0' testImplementation 'org.subethamail:subethasmtp:3.1.7' - testImplementation (project(':nf-cid')) + testImplementation (project(':nf-lineage')) // test configuration testFixturesApi ("org.apache.groovy:groovy-test:4.0.26") { exclude group: 'org.apache.groovy' } testFixturesApi ("org.objenesis:objenesis:3.4") diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy similarity index 84% rename from modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy rename to modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy index 35d064e417..7b56c9595c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdCid.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy @@ -35,14 +35,14 @@ import org.pf4j.ExtensionPoint */ @CompileStatic @Parameters(commandDescription = "Explore workflows CID metadata") -class CmdCid extends CmdBase implements UsageAware { +class CmdLineage extends CmdBase implements UsageAware { - private static final String NAME = 'cid' + private static final String NAME = 'lineage' - interface CidCommand extends ExtensionPoint { + interface LinCommand extends ExtensionPoint { void log(ConfigMap config) - void show(ConfigMap config, List args) - void lineage(ConfigMap config, List args) + void describe(ConfigMap config, List args) + void render(ConfigMap config, List args) void diff(ConfigMap config, List args) void find(ConfigMap config, List args) } @@ -56,14 +56,14 @@ class CmdCid extends CmdBase implements UsageAware { private List commands = new ArrayList<>() - private CidCommand operation + private LinCommand operation private ConfigMap config - CmdCid() { + CmdLineage() { commands << new CmdLog() - commands << new CmdShow() - commands << new CmdLineage() + commands << new CmdDescribe() + commands << new CmdRender() commands << new CmdDiff() commands << new CmdFind() } @@ -92,7 +92,7 @@ class CmdCid extends CmdBase implements UsageAware { // init plugins Plugins.load(config) // load the command operations - this.operation = Plugins.getExtension(CidCommand) + this.operation = Plugins.getExtension(LinCommand) if( !operation ) throw new IllegalStateException("Unable to load CID plugin") // consume the first argument @@ -157,7 +157,7 @@ class CmdCid extends CmdBase implements UsageAware { @Override String getDescription() { - return 'Print the CID execution log' + return 'Print the Lineage execution log' } @Override @@ -177,16 +177,16 @@ class CmdCid extends CmdBase implements UsageAware { } } - class CmdShow implements SubCmd{ + class CmdDescribe implements SubCmd{ @Override String getName() { - return 'show' + return 'describe' } @Override String getDescription() { - return 'Print the description of a CID reference' + return 'Print the description of a Lineage ID' } void apply(List args) { @@ -196,24 +196,24 @@ class CmdCid extends CmdBase implements UsageAware { return } - operation.show(config, args) + operation.describe(config, args) } @Override void usage() { println description - println "Usage: nextflow $NAME $name " + println "Usage: nextflow $NAME $name " } } - class CmdLineage implements SubCmd { + class CmdRender implements SubCmd { @Override - String getName() { 'lineage' } + String getName() { 'render' } @Override String getDescription() { - return 'Render a lineage graph for a workflow output' + return 'Render the lineage graph for a workflow output' } void apply(List args) { @@ -223,13 +223,13 @@ class CmdCid extends CmdBase implements UsageAware { return } - operation.lineage(config, args) + operation.render(config, args) } @Override void usage() { println description - println "Usage: nextflow $NAME $name " + println "Usage: nextflow $NAME $name " } } @@ -241,7 +241,7 @@ class CmdCid extends CmdBase implements UsageAware { @Override String getDescription() { - return 'Show differences between two CID descriptions' + return 'Show differences between two lineage descriptions' } void apply(List args) { @@ -256,7 +256,7 @@ class CmdCid extends CmdBase implements UsageAware { @Override void usage() { println description - println "Usage: nextflow $NAME $name " + println "Usage: nextflow $NAME $name " } } @@ -268,7 +268,7 @@ class CmdCid extends CmdBase implements UsageAware { @Override String getDescription() { - return 'Find CID metadata descriptions matching with a query' + return 'Find Lineage metadata descriptions matching with a query' } void apply(List args) { diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 4f3bbf423e..3e7cc1048f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -354,7 +354,7 @@ class CmdRun extends CmdBase implements HubOptions { runner.session.disableJobsCancellation = getDisableJobsCancellation() final isTowerEnabled = config.navigate('tower.enabled') as Boolean - final isDataEnabled = config.navigate("workflow.data.enabled") as Boolean + final isDataEnabled = config.navigate("workflow.lineage.enabled") as Boolean if( isTowerEnabled || isDataEnabled || log.isTraceEnabled() ) runner.session.resolvedConfig = ConfigBuilder.resolveConfig(scriptFile.parent, this) // note config files are collected during the build process diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy index fb5c8180f9..5347f638a6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy @@ -108,7 +108,7 @@ class Launcher { new CmdPlugin(), new CmdInspect(), new CmdLint(), - new CmdCid() + new CmdLineage() ] if(SecretsLoader.isEnabled()) diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy similarity index 64% rename from modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy rename to modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index 3207203475..e25d5655cf 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdCidTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -17,32 +17,29 @@ package nextflow.cli -import nextflow.data.cid.DefaultCidHistoryLog -import nextflow.data.cid.serde.CidEncoder - -import java.nio.file.Files - import nextflow.SysEnv import nextflow.dag.MermaidHtmlRenderer -import nextflow.data.cid.CidHistoryRecord -import nextflow.data.cid.CidStoreFactory -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.TaskRun +import nextflow.lineage.DefaultLinHistoryLog +import nextflow.lineage.LinHistoryRecord +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskRun +import nextflow.lineage.serde.LinEncoder import nextflow.plugin.Plugins +import java.nio.file.Files +import java.time.OffsetDateTime import org.junit.Rule import spock.lang.Specification import test.OutputCapture -import java.time.OffsetDateTime - /** - * CLI cid Tests + * CLI lineage Tests * * @author Jorge Ejarque */ -class CmdCidTest extends Specification { +class CmdLineageTest extends Specification { def setup() { // clear the environment to avoid the local env pollute the test env @@ -51,12 +48,12 @@ class CmdCidTest extends Specification { def cleanup() { Plugins.stop() - CidStoreFactory.reset() + LinStoreFactory.reset() SysEnv.pop() } def setupSpec() { - CidStoreFactory.reset() + LinStoreFactory.reset() } /* @@ -65,23 +62,23 @@ class CmdCidTest extends Specification { @Rule OutputCapture capture = new OutputCapture() - def 'should print executions cids' (){ + def 'should print executions lids' (){ given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() def historyFile = folder.resolve(".meta/.history") - def cidLog = new DefaultCidHistoryLog(historyFile) + def lidLog = new DefaultLinHistoryLog(historyFile) def uniqueId = UUID.randomUUID() def date = new Date(); def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - cidLog.write("run_name", uniqueId, "cid://123456", date) - def recordEntry = "${CidHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://123456".toString() + lidLog.write("run_name", uniqueId, "lid://123456", date) + def recordEntry = "${LinHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tlid://123456".toString() when: - def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) - cidCmd.run() + def lidCmd = new CmdLineage(launcher: launcher, args: ["log"]) + lidCmd.run() def stdout = capture .toString() .readLines()// remove the log part @@ -101,15 +98,15 @@ class CmdCidTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() def historyFile = folder.resolve(".meta/.history") Files.createDirectories(historyFile.parent) def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } when: - def cidCmd = new CmdCid(launcher: launcher, args: ["log"]) - cidCmd.run() + def lidCmd = new CmdLineage(launcher: launcher, args: ["log"]) + lidCmd.run() def stdout = capture .toString() .readLines()// remove the log part @@ -120,32 +117,32 @@ class CmdCidTest extends Specification { then: stdout.size() == 1 - stdout[0] == "No workflow runs CIDs found." + stdout[0] == "No workflow runs LIDs found." cleanup: folder?.deleteDir() } - def 'should show cid content' (){ + def 'should show lid content' (){ given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() - def cidFile = folder.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) + configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + def lidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } def time = OffsetDateTime.now() - def encoder = new CidEncoder().withPrettyPrint(true) + def encoder = new LinEncoder().withPrettyPrint(true) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam","cid://12345/","cid://123987/", 1234, time, time, null) + "lid://123987/file.bam","lid://12345/","lid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer - cidFile.text = jsonSer + lidFile.text = jsonSer when: - def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid://12345"]) - cidCmd.run() + def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid://12345"]) + lidCmd.run() def stdout = capture .toString() .readLines()// remove the log part @@ -161,18 +158,18 @@ class CmdCidTest extends Specification { folder?.deleteDir() } - def 'should warn if no cid content' (){ + def 'should warn if no lid content' (){ given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } when: - def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid://12345"]) - cidCmd.run() + def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid://12345"]) + lidCmd.run() def stdout = capture .toString() .readLines()// remove the log part @@ -182,74 +179,74 @@ class CmdCidTest extends Specification { then: stdout.size() == 1 - stdout[0] == "Error loading cid://12345. Cid object 12345 not found." + stdout[0] == "Error loading lid://12345. Lineage object 12345 not found." cleanup: folder?.deleteDir() } - def 'should get lineage cid content' (){ + def 'should get lineage lid content' (){ given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') def outputHtml = folder.resolve('lineage.html') - configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() + configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - def cidFile = folder.resolve(".meta/12345/file.bam/.data.json") - def cidFile2 = folder.resolve(".meta/123987/file.bam/.data.json") - def cidFile3 = folder.resolve(".meta/123987/.data.json") - def cidFile4 = folder.resolve(".meta/45678/output.txt/.data.json") - def cidFile5 = folder.resolve(".meta/45678/.data.json") - Files.createDirectories(cidFile.parent) - Files.createDirectories(cidFile2.parent) - Files.createDirectories(cidFile3.parent) - Files.createDirectories(cidFile4.parent) - Files.createDirectories(cidFile5.parent) - def encoder = new CidEncoder() + def lidFile = folder.resolve(".meta/12345/file.bam/.data.json") + def lidFile2 = folder.resolve(".meta/123987/file.bam/.data.json") + def lidFile3 = folder.resolve(".meta/123987/.data.json") + def lidFile4 = folder.resolve(".meta/45678/output.txt/.data.json") + def lidFile5 = folder.resolve(".meta/45678/.data.json") + Files.createDirectories(lidFile.parent) + Files.createDirectories(lidFile2.parent) + Files.createDirectories(lidFile3.parent) + Files.createDirectories(lidFile4.parent) + Files.createDirectories(lidFile5.parent) + def encoder = new LinEncoder() def time = OffsetDateTime.now() def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://45678",null, 1234, time, time, null) - cidFile.text = encoder.encode(entry) + "lid://123987/file.bam", "lid://45678",null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987", "cid://45678", "cid://123987", 1234, time, time, null) - cidFile2.text = encoder.encode(entry) + "lid://123987", "lid://45678", "lid://123987", 1234, time, time, null) + lidFile2.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), new Checksum("abfsc2375","nextflow","standard"), [new Parameter( "ValueInParam", "sample_id","ggal_gut"), - new Parameter("FileInParam","reads",["cid://45678/output.txt"])], + new Parameter("FileInParam","reads",["lid://45678/output.txt"])], null, null, null, null, [:],[], null) - cidFile3.text = encoder.encode(entry) + lidFile3.text = encoder.encode(entry) entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://45678", "cid://45678", null, 1234, time, time, null) - cidFile4.text = encoder.encode(entry) + "lid://45678", "lid://45678", null, 1234, time, time, null) + lidFile4.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), new Checksum("abfsc2375","nextflow","standard"), null,null, null, null, null, [:],[], null) - cidFile5.text = encoder.encode(entry) + lidFile5.text = encoder.encode(entry) final network = """flowchart BT - cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} - cid://123987/file.bam@{shape: document, label: "cid://123987/file.bam"} - cid://123987@{shape: process, label: "foo"} + lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} + lid://123987/file.bam@{shape: document, label: "lid://123987/file.bam"} + lid://123987@{shape: process, label: "foo"} ggal_gut@{shape: document, label: "ggal_gut"} - cid://45678/output.txt@{shape: document, label: "cid://45678/output.txt"} - cid://45678@{shape: process, label: "bar"} - - cid://123987/file.bam -->cid://12345/file.bam - cid://123987 -->cid://123987/file.bam - ggal_gut -->cid://123987 - cid://45678/output.txt -->cid://123987 - cid://45678 -->cid://45678/output.txt + lid://45678/output.txt@{shape: document, label: "lid://45678/output.txt"} + lid://45678@{shape: process, label: "bar"} + + lid://123987/file.bam -->lid://12345/file.bam + lid://123987 -->lid://123987/file.bam + ggal_gut -->lid://123987 + lid://45678/output.txt -->lid://123987 + lid://45678 -->lid://45678/output.txt """ final template = MermaidHtmlRenderer.readTemplate() def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) when: - def cidCmd = new CmdCid(launcher: launcher, args: ["lineage", "cid://12345/file.bam", outputHtml.toString()]) - cidCmd.run() + def lidCmd = new CmdLineage(launcher: launcher, args: ["render", "lid://12345/file.bam", outputHtml.toString()]) + lidCmd.run() def stdout = capture .toString() .readLines()// remove the log part @@ -259,7 +256,7 @@ class CmdCidTest extends Specification { then: stdout.size() == 1 - stdout[0] == "Linage graph for cid://12345/file.bam rendered in ${outputHtml}" + stdout[0] == "Linage graph for lid://12345/file.bam rendered in ${outputHtml}" outputHtml.exists() outputHtml.text == expectedOutput @@ -272,22 +269,22 @@ class CmdCidTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() - def cidFile = folder.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) + configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + def lidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - def encoder = new CidEncoder().withPrettyPrint(true) + def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.now() def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://12345", "cid://123987/", 1234, time, time, null) + "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer - cidFile.text = jsonSer + lidFile.text = jsonSer when: - def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid:///?type=DataOutput"]) - cidCmd.run() + def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid:///?type=DataOutput"]) + lidCmd.run() def stdout = capture .toString() .readLines()// remove the log part @@ -307,22 +304,22 @@ class CmdCidTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.data.enabled = true\nworkflow.data.store.location = '$folder'".toString() - def cidFile = folder.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) + configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + def lidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } - def encoder = new CidEncoder().withPrettyPrint(true) + def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.now() def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://12345", "cid://123987/", 1234, time, time, null) + "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer - cidFile.text = jsonSer + lidFile.text = jsonSer when: - def cidCmd = new CmdCid(launcher: launcher, args: ["show", "cid:///?type=DataOutput"]) - cidCmd.run() + def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid:///?type=DataOutput"]) + lidCmd.run() def stdout = capture .toString() .readLines()// remove the log part diff --git a/modules/nf-cid/build.gradle b/modules/nf-lineage/build.gradle similarity index 100% rename from modules/nf-cid/build.gradle rename to modules/nf-lineage/build.gradle diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinHistoryLog.groovy similarity index 66% rename from modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/DefaultLinHistoryLog.groovy index f9e6144c28..f7a306f616 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidHistoryLog.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinHistoryLog.groovy @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import java.nio.file.Files import java.nio.file.Path @@ -22,62 +22,62 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.extension.FilesEx /** - * File to store a history of the workflow executions and their corresponding CIDs + * File to store a history of the workflow executions and their corresponding LIDs * * @author Jorge Ejarque */ @Slf4j @CompileStatic -class DefaultCidHistoryLog implements CidHistoryLog { +class DefaultLinHistoryLog implements LinHistoryLog { Path path - DefaultCidHistoryLog(Path folder) { + DefaultLinHistoryLog(Path folder) { this.path = folder if( !path.exists() ) Files.createDirectories(path) } - void write(String name, UUID key, String runCid, Date date = null) { + void write(String name, UUID key, String runLid, Date date = null) { assert key def timestamp = date ?: new Date() final recordFile = path.resolve(key.toString()) try { - recordFile.text = new CidHistoryRecord(timestamp, name, key, runCid).toString() - log.trace("Record for $key written in CID history log ${FilesEx.toUriString(this.path)}") + recordFile.text = new LinHistoryRecord(timestamp, name, key, runLid).toString() + log.trace("Record for $key written in lineage history log ${FilesEx.toUriString(this.path)}") }catch (Throwable e) { log.warn("Can't write record $key file ${FilesEx.toUriString(recordFile)}", e.message) } } - void updateRunCid(UUID id, String runCid) { + void updateRunLid(UUID id, String runLid) { assert id final recordFile = path.resolve(id.toString()) try { - def current = CidHistoryRecord.parse(path.resolve(id.toString()).text) - recordFile.text = new CidHistoryRecord(current.timestamp, current.runName, id, runCid).toString() + def current = LinHistoryRecord.parse(path.resolve(id.toString()).text) + recordFile.text = new LinHistoryRecord(current.timestamp, current.runName, id, runLid).toString() } catch (Throwable e) { log.warn("Can't read session $id file: ${FilesEx.toUriString(recordFile)}", e.message) } } - List getRecords(){ - List list = new LinkedList() + List getRecords(){ + List list = new LinkedList() try { - this.path.eachFile { Path file -> list.add(CidHistoryRecord.parse(file.text))} + this.path.eachFile { Path file -> list.add(LinHistoryRecord.parse(file.text))} } catch (Throwable e) { - log.warn "Exception reading records from CID history folder: ${FilesEx.toUriString(this.path)}", e.message + log.warn "Exception reading records from lineage history folder: ${FilesEx.toUriString(this.path)}", e.message } return list.sort {it.timestamp } } - CidHistoryRecord getRecord(UUID id) { + LinHistoryRecord getRecord(UUID id) { assert id final recordFile = path.resolve(id.toString()) try { - return CidHistoryRecord.parse(recordFile.text) + return LinHistoryRecord.parse(recordFile.text) } catch( Throwable e ) { log.warn("Can't find session $id in file: ${FilesEx.toUriString(recordFile)}", e.message) return null diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy similarity index 74% rename from modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy index a9c374dcc8..28251a1012 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import groovy.transform.CompileStatic import groovy.util.logging.Slf4j @@ -25,63 +25,64 @@ import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes -import nextflow.data.cid.serde.CidEncoder -import nextflow.data.cid.serde.CidSerializable -import nextflow.data.config.DataConfig +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.serde.LinSerializable +import nextflow.lineage.config.LineageConfig import nextflow.exception.AbortOperationException import nextflow.file.FileHelper import nextflow.util.TestOnly /** - * Default Implementation for the a CID store. + * Default Implementation for the a lineage store. * * @author Paolo Di Tommaso */ @Slf4j @CompileStatic -class DefaultCidStore implements CidStore { +class DefaultLinStore implements LinStore { private static String HISTORY_FILE_NAME =".history" private static final String METADATA_FILE = '.data.json' private static final String METADATA_PATH = '.meta' + private static final String DEFAULT_LOCATION = 'lineage' private Path metaLocation private Path location - private CidHistoryLog historyLog - private CidEncoder encoder + private LinHistoryLog historyLog + private LinEncoder encoder - DefaultCidStore open(DataConfig config) { + DefaultLinStore open(LineageConfig config) { location = toLocationPath(config.store.location) metaLocation = location.resolve(METADATA_PATH) - encoder = new CidEncoder() + encoder = new LinEncoder() if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { - throw new AbortOperationException("Unable to create CID store directory: $metaLocation") + throw new AbortOperationException("Unable to create lineage store directory: $metaLocation") } - historyLog = new DefaultCidHistoryLog(metaLocation.resolve(HISTORY_FILE_NAME)) + historyLog = new DefaultLinHistoryLog(metaLocation.resolve(HISTORY_FILE_NAME)) return this } protected Path toLocationPath(String location) { return location ? FileHelper.toCanonicalPath(location) - : Path.of('.').toAbsolutePath().normalize().resolve('data') + : Path.of('.').toAbsolutePath().normalize().resolve(DEFAULT_LOCATION) } @Override - void save(String key, CidSerializable value) { + void save(String key, LinSerializable value) { final path = metaLocation.resolve("$key/$METADATA_FILE") Files.createDirectories(path.parent) - log.debug "Save CID file path: $path" + log.debug "Save LID file path: $path" path.text = encoder.encode(value) } @Override - CidSerializable load(String key) { + LinSerializable load(String key) { final path = metaLocation.resolve("$key/$METADATA_FILE") log.debug("Loading from path $path") if (path.exists()) - return encoder.decode(path.text) as CidSerializable + return encoder.decode(path.text) as LinSerializable log.debug("File for key $key not found") return null } @@ -96,7 +97,7 @@ class DefaultCidStore implements CidStore { } @Override - CidHistoryLog getHistoryLog(){ + LinHistoryLog getHistoryLog(){ return historyLog } @@ -104,16 +105,16 @@ class DefaultCidStore implements CidStore { void close() throws IOException { } @Override - Map search(String queryString) { + Map search(String queryString) { def params = null if (queryString) { - params = CidUtils.parseQuery(queryString) + params = LinUtils.parseQuery(queryString) } return searchAllFiles(params) } - private Map searchAllFiles (Map params) { - final results = new HashMap() + private Map searchAllFiles (Map params) { + final results = new HashMap() Files.walkFileTree(metaLocation, new FileVisitor() { @@ -125,9 +126,9 @@ class DefaultCidStore implements CidStore { @Override FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (file.name.startsWith('.data.json') ) { - final cidObject = encoder.decode(file.text) - if (CidUtils.checkParams(cidObject, params)){ - results.put(metaLocation.relativize(file.getParent()).toString(), cidObject as CidSerializable) + final lidObject = encoder.decode(file.text) + if (LinUtils.checkParams(lidObject, params)){ + results.put(metaLocation.relativize(file.getParent()).toString(), lidObject as LinSerializable) } } FileVisitResult.CONTINUE diff --git a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy similarity index 79% rename from modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy index f9feba03a9..846c0ebbbd 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/DefaultCidStoreFactory.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy @@ -13,28 +13,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import java.util.regex.Pattern import groovy.transform.CompileStatic -import nextflow.data.config.DataConfig +import nextflow.lineage.config.LineageConfig import nextflow.plugin.Priority /** - * Default Factory for CidStore. + * Default Factory for Lineage Store. * * @author Jorge Ejarque */ @CompileStatic @Priority(0) -class DefaultCidStoreFactory extends CidStoreFactory { +class DefaultLinStoreFactory extends LinStoreFactory { private static Pattern SCHEME = ~/^([a-zA-Z][a-zA-Z\d+\-.]*):/ private static List SUPPORTED_SCHEMES = ['file', 's3', 'gs', 'az'] @Override - boolean canOpen(DataConfig config) { + boolean canOpen(LineageConfig config) { final loc = config.store.location if( !loc ) { return true @@ -44,8 +44,8 @@ class DefaultCidStoreFactory extends CidStoreFactory { } @Override - protected CidStore newInstance(DataConfig config) { - return new DefaultCidStore() .open(config) + protected LinStore newInstance(LineageConfig config) { + return new DefaultLinStore() .open(config) } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryLog.groovy similarity index 61% rename from modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinHistoryLog.groovy index 48621b96a2..d95a110c60 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryLog.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryLog.groovy @@ -13,44 +13,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage /** - * Interface to log workflow executions and their corresponding CIDs + * Interface to log workflow executions and their corresponding Lineage IDs * * @author Jorge Ejarque */ -interface CidHistoryLog { +interface LinHistoryLog { /** - * Write a workflow execution CidHistoryLog record. + * Write a workflow execution linage history log record. * * @param name Workflow execution name. * @param sessionId Workflow session ID. - * @param runCid Workflow run CID. - * @param resultsCid Workflow results CID. + * @param runLid Workflow run ID. */ - void write(String name, UUID sessionId, String runCid) + void write(String name, UUID sessionId, String runLid) /** - * Updates the run CID for a given session ID. + * Updates the run LID for a given session ID. * * @param sessionId Workflow session ID. - * @param runCid Workflow run CID. + * @param runLid Workflow run Lineage ID. */ - void updateRunCid(UUID sessionId, String runCid) + void updateRunLid(UUID sessionId, String runLid) /** - * Get the store records in the CidHistoryLog. + * Get the store records in the Lineage History Log. * - * @return List stored CIDHistoryRecords. + * @return List of stored lineage history records. */ - List getRecords() + List getRecords() /** * Get the record for a given * @param sessionId Workflow session ID. - * @return CIDHistoryRecord for the given ID. + * @return LinHistoryRecord for the given ID. */ - CidHistoryRecord getRecord(UUID sessionId) + LinHistoryRecord getRecord(UUID sessionId) } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy similarity index 79% rename from modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy index c80a2ce65f..31dc2d9478 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidHistoryRecord.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode @@ -22,34 +22,34 @@ import java.text.DateFormat import java.text.SimpleDateFormat /** - * Record of workflow executions and their corresponding CIDs + * Record of workflow executions and their corresponding Lineage IDs * * @author Jorge Ejarque */ @CompileStatic @EqualsAndHashCode(includes = 'runName,sessionId') -class CidHistoryRecord { +class LinHistoryRecord { public static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') final Date timestamp final String runName final UUID sessionId - final String runCid + final String runLid - CidHistoryRecord(Date timestamp, String name, UUID sessionId, String runCid) { + LinHistoryRecord(Date timestamp, String name, UUID sessionId, String runLid) { this.timestamp = timestamp this.runName = name this.sessionId = sessionId - this.runCid = runCid + this.runLid = runLid } - protected CidHistoryRecord() {} + protected LinHistoryRecord() {} List toList() { def line = new ArrayList(4) line << (timestamp ? TIMESTAMP_FMT.format(timestamp) : '-') line << (runName ?: '-') line << (sessionId.toString()) - line << (runCid ?: '-') + line << (runLid ?: '-') } @Override @@ -57,10 +57,10 @@ class CidHistoryRecord { toList().join('\t') } - static CidHistoryRecord parse(String line) { + static LinHistoryRecord parse(String line) { def cols = line.tokenize('\t') if (cols.size() == 4) { - return new CidHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3]) + return new LinHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3]) } throw new IllegalArgumentException("Not a valid history entry: `$line`") } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy similarity index 92% rename from modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy index eafd82febd..39ceda8383 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserver.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy @@ -14,13 +14,13 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import nextflow.util.SecretHelper import java.time.OffsetDateTime -import static nextflow.data.cid.fs.CidPath.* +import static nextflow.lineage.fs.LinPath.* import java.nio.file.Files import java.nio.file.Path @@ -29,15 +29,15 @@ import java.nio.file.attribute.BasicFileAttributes import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.Session -import nextflow.data.cid.model.Annotation -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutputs -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutputs -import nextflow.data.cid.model.WorkflowRun +import nextflow.lineage.model.Annotation +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowRun import nextflow.file.FileHelper import nextflow.file.FileHolder import nextflow.processor.TaskHandler @@ -65,13 +65,13 @@ import nextflow.util.PathNormalizer import nextflow.util.TestOnly /** - * Observer to write the generated workflow metadata in a CID store. + * Observer to write the generated workflow metadata in a lineage store. * * @author Paolo Di Tommaso */ @Slf4j @CompileStatic -class CidObserver implements TraceObserver { +class LinObserver implements TraceObserver { private static Map, String> TaskParamToValue = [ (StdOutParam) : "stdout", (StdInParam) : "stdin", @@ -85,13 +85,13 @@ class CidObserver implements TraceObserver { (EachInParam) : "each" ] private String executionHash - private CidStore store + private LinStore store private Session session private WorkflowOutputs workflowResults - private Map outputsStoreDirCid = new HashMap(10) + private Map outputsStoreDirLid = new HashMap(10) private PathNormalizer normalizer - CidObserver(Session session, CidStore store){ + LinObserver(Session session, LinStore store){ this.session = session this.store = store } @@ -120,7 +120,7 @@ class CidObserver implements TraceObserver { executionUri, new LinkedList() ) - this.store.getHistoryLog().updateRunCid(session.uniqueId, executionUri) + this.store.getHistoryLog().updateRunLid(session.uniqueId, executionUri) } @Override @@ -250,7 +250,7 @@ class CidObserver implements TraceObserver { protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { final codeChecksum = Checksum.ofNextflow(session.stubRun ? task.stubSource: task.source) final scriptChecksum = Checksum.ofNextflow(task.script) - final value = new nextflow.data.cid.model.TaskRun( + final value = new nextflow.lineage.model.TaskRun( session.uniqueId.toString(), task.getName(), codeChecksum, @@ -286,12 +286,12 @@ class CidObserver implements TraceObserver { asUriString(executionHash), asUriString(task.hash.toString()), attrs.size(), - CidUtils.toDate(attrs?.creationTime()), - CidUtils.toDate(attrs?.lastModifiedTime())) + LinUtils.toDate(attrs?.creationTime()), + LinUtils.toDate(attrs?.lastModifiedTime())) store.save(key, value) return key } catch (Throwable e) { - log.warn("Unexpected error storing CID output '${path.toUriString()}' for task '${task.name}'", e) + log.warn("Unexpected error storing lineage output '${path.toUriString()}' for task '${task.name}'", e) return path.toUriString() } } @@ -331,7 +331,7 @@ class CidObserver implements TraceObserver { if( storeDir && path.startsWith(storeDir) ) { final rel = storeDir.relativize(path) //If output stored in storeDir, keep the path in case it is used as workflow output - this.outputsStoreDirCid.put(path.toString(), asUriString(task.hash.toString(),rel.toString())) + this.outputsStoreDirLid.put(path.toString(), asUriString(task.hash.toString(),rel.toString())) return rel } return null @@ -359,8 +359,8 @@ class CidObserver implements TraceObserver { asUriString(executionHash), null, attrs.size(), - CidUtils.toDate(attrs?.creationTime()), - CidUtils.toDate(attrs?.lastModifiedTime()), + LinUtils.toDate(attrs?.creationTime()), + LinUtils.toDate(attrs?.lastModifiedTime()), convertAnnotations(annotations)) store.save(key, value) } catch (Throwable e) { @@ -381,7 +381,7 @@ class CidObserver implements TraceObserver { final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() return asUriString(hash.toString(), target) } - final storeDirReference = outputsStoreDirCid.get(source.toString()) + final storeDirReference = outputsStoreDirLid.get(source.toString()) return storeDirReference ? asUriString(storeDirReference) : null } @@ -392,7 +392,7 @@ class CidObserver implements TraceObserver { @Override void onWorkflowPublish(String name, Object value){ - workflowResults.outputs.add(new Parameter(getParameterType(value), name, convertPathsToCidReferences(value))) + workflowResults.outputs.add(new Parameter(getParameterType(value), name, convertPathsToLidReferences(value))) } protected static String getParameterType(Object param) { @@ -410,7 +410,7 @@ class CidObserver implements TraceObserver { return param.class.simpleName } - private Object convertPathsToCidReferences(Object value){ + private Object convertPathsToLidReferences(Object value){ if( value instanceof Path ) { try { final key = getWorkflowOutputKey(value) @@ -422,13 +422,13 @@ class CidObserver implements TraceObserver { } if( value instanceof Collection ) { - return value.collect { el -> convertPathsToCidReferences(el) } + return value.collect { el -> convertPathsToLidReferences(el) } } if( value instanceof Map ) { return value .findAll { k, v -> v != null } - .collectEntries { k, v -> Map.entry(k, convertPathsToCidReferences(v)) } + .collectEntries { k, v -> Map.entry(k, convertPathsToLidReferences(v)) } } return value } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserverFactory.groovy similarity index 79% rename from modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinObserverFactory.groovy index 1826f06b5c..3044389d73 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidObserverFactory.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserverFactory.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import groovy.transform.CompileStatic import nextflow.Session @@ -22,19 +22,19 @@ import nextflow.trace.TraceObserver import nextflow.trace.TraceObserverFactory /** - * Implements factory for {@link CidObserver} object + * Implements factory for {@link LinObserver} object * * @author Paolo Di Tommaso */ @CompileStatic -class CidObserverFactory implements TraceObserverFactory { +class LinObserverFactory implements TraceObserverFactory { @Override Collection create(Session session) { final result = new ArrayList(1) - final store = CidStoreFactory.getOrCreate(session) + final store = LinStoreFactory.getOrCreate(session) if( store ) - result.add( new CidObserver(session, store) ) + result.add( new LinObserver(session, store) ) return result } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy similarity index 69% rename from modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy index 100788b72c..86ea5cbbea 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidPropertyValidator.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy @@ -14,32 +14,32 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage -import nextflow.data.cid.model.Annotation -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutputs -import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutputs -import nextflow.data.cid.model.WorkflowRun +import nextflow.lineage.model.Annotation +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowRun import java.lang.reflect.Field /** - * Class to validate if the string refers to a property in the classes of te CID Metadata model. + * Class to validate if the string refers to a property in the classes of the Lineage Metadata model. * @author Jorge Ejarque */ -class CidPropertyValidator { +class LinPropertyValidator { - private static List CID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum, Annotation] + private static List LID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum, Annotation] private Set validProperties - CidPropertyValidator(){ + LinPropertyValidator(){ this.validProperties = new HashSet() - for( Class clazz: CID_MODEL_CLASSES) { + for( Class clazz: LID_MODEL_CLASSES) { for( Field field: clazz.declaredFields) { validProperties.add( field.name) } @@ -49,7 +49,7 @@ class CidPropertyValidator { void validate(Collection properties) { for(String property: properties) { if (!(property in this.validProperties)) { - throw new IllegalArgumentException("Property '$property' doesn't exist in the CID model") + throw new IllegalArgumentException("Property '$property' doesn't exist in the lineage model") } } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy similarity index 55% rename from modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy index f90cd4249f..ffef02b86b 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy @@ -14,51 +14,51 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import groovy.transform.CompileStatic -import nextflow.data.cid.serde.CidSerializable -import nextflow.data.config.DataConfig +import nextflow.lineage.serde.LinSerializable +import nextflow.lineage.config.LineageConfig /** - * Interface for the CID store + * Interface for the lineage store * * @author Paolo Di Tommaso */ @CompileStatic -interface CidStore extends Closeable { +interface LinStore extends Closeable { /** - * Open the CID store. - * @param config Configuration to open the CID store. + * Open the lineage store. + * @param config Configuration to open the lineage store. */ - CidStore open(DataConfig config) + LinStore open(LineageConfig config) /** - * Save a CID entry in the store for in a given key. + * Save a lineage entry in the store for in a given key. * @param key Entry key. * @param value Entry object. */ - void save(String key, CidSerializable value) + void save(String key, LinSerializable value) /** - * Load an entry for a given CID key. - * @param key CID key. + * Load an entry for a given Lineage ID key. + * @param key LID key. * @return entry value, or null if key does not exists */ - CidSerializable load(String key) + LinSerializable load(String key) /** - * Get the {@link CidHistoryLog} object associated to the CidStore. - * @return {@link CidHistoryLog} object + * Get the {@link LinHistoryLog} object associated to the lineage store. + * @return {@link LinHistoryLog} object */ - CidHistoryLog getHistoryLog() + LinHistoryLog getHistoryLog() /** - * Search for cid entries. + * Search for lineage entries. * @queryString Json-path like query string. (Only simple and nested field operators are supported(No array, wildcards,etc.) - * @return Map fulfilling the queryString + * @return Key-lineage entry pairs fulfilling the queryString */ - Map search(String queryString) + Map search(String queryString) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStoreFactory.groovy similarity index 67% rename from modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinStoreFactory.groovy index 3b6befd1dd..b771766823 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidStoreFactory.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStoreFactory.groovy @@ -14,51 +14,51 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.Session -import nextflow.data.config.DataConfig +import nextflow.lineage.config.LineageConfig import nextflow.plugin.Plugins import nextflow.util.TestOnly import org.pf4j.ExtensionPoint /** - * Factory for {@link CidStore} objects + * Factory for {@link LinStore} objects * * @author Jorge Ejarque */ @Slf4j @CompileStatic -abstract class CidStoreFactory implements ExtensionPoint { +abstract class LinStoreFactory implements ExtensionPoint { - private static CidStore instance + private static LinStore instance private static boolean initialized - protected abstract boolean canOpen(DataConfig config) + protected abstract boolean canOpen(LineageConfig config) - protected abstract CidStore newInstance(DataConfig config) + protected abstract LinStore newInstance(LineageConfig config) - static CidStore create(DataConfig config){ + static LinStore create(LineageConfig config){ final factory = Plugins - .getPriorityExtensions(CidStoreFactory) + .getPriorityExtensions(LinStoreFactory) .find( f-> f.canOpen(config)) if( !factory ) - throw new IllegalStateException("Unable to find Nextflow CID store factory") - log.debug "Using Nextflow CID store factory: ${factory.getClass().getName()}" + throw new IllegalStateException("Unable to find Nextflow Lineage store factory") + log.debug "Using Nextflow Lineage store factory: ${factory.getClass().getName()}" return factory.newInstance(config) } - static CidStore getOrCreate(Session session) { + static LinStore getOrCreate(Session session) { if( instance || initialized ) return instance - synchronized (CidStoreFactory.class) { + synchronized (LinStoreFactory.class) { if( instance || initialized ) return instance initialized = true - final config = DataConfig.create(session) + final config = LineageConfig.create(session) if( !config.enabled ) return null return instance = create(config) @@ -67,7 +67,7 @@ abstract class CidStoreFactory implements ExtensionPoint { @TestOnly static void reset(){ - synchronized (CidStoreFactory.class) { + synchronized (LinStoreFactory.class) { instance = null initialized = false } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy similarity index 82% rename from modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index f1476bf61e..35d32f5616 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/CidUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -14,15 +14,15 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import groovy.transform.CompileStatic import groovy.util.logging.Slf4j -import nextflow.data.cid.fs.CidPath -import nextflow.data.cid.model.WorkflowRun -import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.serde.CidEncoder -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.fs.LinPath +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.model.TaskRun +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.serde.LinSerializable import nextflow.serde.gson.GsonEncoder import java.nio.file.attribute.FileTime @@ -30,29 +30,29 @@ import java.time.OffsetDateTime import java.time.ZoneOffset /** - * Utils class for CID. + * Utils class for Lineage IDs. * * @author Jorge Ejarque */ @Slf4j @CompileStatic -class CidUtils { +class LinUtils { private static final String[] EMPTY_ARRAY = new String[] {} /** - * Query a CID store. - * @param store CID store to query. + * Query a lineage store. + * @param store lineage store to query. * @param uri Query to perform in a URI-like format. - * Format 'cid://[?QueryString][#fragment]' where: - * - Key: Element where the query will be applied. '/' indicates query will be applied in all the elements of the CID store. - * - QueryString: all param-value pairs that the CID element should fulfill in a URI's query string format. + * Format 'lid://[?QueryString][#fragment]' where: + * - Key: Element where the query will be applied. '/' indicates query will be applied in all the elements of the lineage store. + * - QueryString: all param-value pairs that the lineage element should fulfill in a URI's query string format. * - Fragment: Element fragment to retrieve. * @return Collection of object fulfilling the query */ - static Collection query(CidStore store, URI uri) { + static Collection query(LinStore store, URI uri) { String key = uri.authority ? uri.authority + uri.path : uri.path - if (key == CidPath.SEPARATOR) { + if (key == LinPath.SEPARATOR) { return globalSearch(store, uri) } else { final parameters = uri.query ? parseQuery(uri.query) : null @@ -62,7 +62,7 @@ class CidUtils { } - private static Collection globalSearch(CidStore store, URI uri) { + private static Collection globalSearch(LinStore store, URI uri) { final results = store.search(uri.query).values() if (results && uri.fragment) { // If fragment is defined get the property of the object indicated by the fragment @@ -71,7 +71,7 @@ class CidUtils { return results } - private static List filterResults(Collection results, String fragment) { + private static List filterResults(Collection results, String fragment) { final filteredResults = [] results.forEach { final output = navigate(it, fragment) @@ -91,22 +91,22 @@ class CidUtils { if( !fragment ) return EMPTY_ARRAY final children = fragment.tokenize('.') - new CidPropertyValidator().validate(children) + new LinPropertyValidator().validate(children) return children as String[] } /** * Search for objects inside a description - * @param store CID store - * @param key CID key where to perform the search + * @param store lineage store + * @param key lineage key where to perform the search * @param params Parameter-value pairs to be evaluated in the key * @param children Sub-objects to evaluate and retrieve * @return List of object */ - protected static List searchPath(CidStore store, String key, Map params, String[] children = []) { + protected static List searchPath(LinStore store, String key, Map params, String[] children = []) { final object = store.load(key) if (!object) { - throw new FileNotFoundException("Cid object $key not found.") + throw new FileNotFoundException("Lineage object $key not found.") } final results = new LinkedList() if (children && children.size() > 0) { @@ -118,10 +118,10 @@ class CidUtils { return results } - private static void treatSubObject(CidStore store, String key, CidSerializable object, String[] children, Map params, LinkedList results) { + private static void treatSubObject(LinStore store, String key, LinSerializable object, String[] children, Map params, LinkedList results) { final output = getSubObject(store, key, object, children) if (!output) { - throw new FileNotFoundException("Cid object $key#${children.join('.')} not found.") + throw new FileNotFoundException("Lineage object $key#${children.join('.')} not found.") } treatObject(output, params, results) } @@ -130,13 +130,13 @@ class CidUtils { * Get a metadata sub-object. * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. * - * @param store CidStore to retrieve metadata objects. + * @param store Store to retrieve lineage metadata objects. * @param key Parent metadata key. * @param object Parent object. * @param children Array of string in indicating the properties to navigate to get the sub-object. * @return Sub-object or null in it does not exist. */ - static Object getSubObject(CidStore store, String key, CidSerializable object, String[] children) { + static Object getSubObject(LinStore store, String key, LinSerializable object, String[] children) { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description final outputs = store.load("${key}#outputs") @@ -148,13 +148,13 @@ class CidUtils { } /** - * Check if the Cid pseudo path or query is for Task or Workflow outputs. + * Check if the Lid pseudo path or query is for Task or Workflow outputs. * - * @param object Parent Cid metadata object + * @param object Parent Lid metadata object * @param children Array of string in indicating the properties to navigate to get the sub-object. * @return return 'true' if the parent is a Task/Workflow run and the first element in children is 'outputs'. Otherwise 'false' */ - static boolean isSearchingOutputs(CidSerializable object, String[] children) { + static boolean isSearchingOutputs(LinSerializable object, String[] children) { return (object instanceof WorkflowRun || object instanceof TaskRun) && children && children[0] == 'outputs' } @@ -187,7 +187,7 @@ class CidUtils { final params = queryString.split('&').collectEntries { it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } } as Map - new CidPropertyValidator().validateQueryParams(params) + new LinPropertyValidator().validateQueryParams(params) return params } @@ -229,8 +229,8 @@ class CidUtils { static Object navigate(Object obj, String path){ if (!obj) return null - // type has been replaced by class when evaluating CidSerializable objects - if (obj instanceof CidSerializable && path == 'type') + // type has been replaced by class when evaluating LidSerializable objects + if (obj instanceof LinSerializable && path == 'type') return obj.getClass()?.simpleName try{ return path.tokenize('.').inject(obj) { current, key -> @@ -299,22 +299,22 @@ class CidUtils { } /** - * Helper function to unify the encoding of outputs when querying and navigating the CID pseudoFS. - * Outputs can include CidSerializable objects, collections or parts of these objects. - * CidSerializable objects can be encoded with the CidEncoder, but collections or parts of + * Helper function to unify the encoding of outputs when querying and navigating the lineage pseudoFS. + * Outputs can include LinSerializable objects, collections or parts of these objects. + * LinSerializable objects can be encoded with the LinEncoder, but collections or parts of * these objects require to extend the GsonEncoder. * * @param output Output to encode * @return Output encoded as a JSON string */ static String encodeSearchOutputs(Object output, boolean prettyPrint) { - if (output instanceof CidSerializable){ - return new CidEncoder().withPrettyPrint(prettyPrint).encode(output) + if (output instanceof LinSerializable){ + return new LinEncoder().withPrettyPrint(prettyPrint).encode(output) } else { return new GsonEncoder() {} .withPrettyPrint(prettyPrint) .withSerializeNulls(true) - .withTypeAdapterFactory(CidEncoder.newCidTypeAdapterFactory()) + .withTypeAdapterFactory(LinEncoder.newLidTypeAdapterFactory()) .encode(output) } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy similarity index 75% rename from modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index 981e41105b..87b56d3f77 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/cli/CidCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -14,11 +14,11 @@ * limitations under the License. */ -package nextflow.data.cid.cli +package nextflow.lineage.cli -import nextflow.data.cid.serde.CidEncoder +import nextflow.lineage.serde.LinEncoder -import static nextflow.data.cid.fs.CidPath.* +import static nextflow.lineage.fs.LinPath.* import java.nio.charset.StandardCharsets import java.nio.file.Path @@ -26,17 +26,17 @@ import java.nio.file.Path import groovy.transform.Canonical import groovy.transform.CompileStatic import nextflow.Session -import nextflow.cli.CmdCid +import nextflow.cli.CmdLineage import nextflow.config.ConfigMap import nextflow.dag.MermaidHtmlRenderer -import nextflow.data.cid.CidHistoryRecord -import nextflow.data.cid.CidStore -import nextflow.data.cid.CidStoreFactory -import nextflow.data.cid.CidUtils -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.model.WorkflowRun +import nextflow.lineage.LinHistoryRecord +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.LinUtils +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.WorkflowRun import nextflow.script.params.FileInParam import nextflow.ui.TableBuilder import org.eclipse.jgit.diff.DiffAlgorithm @@ -45,12 +45,12 @@ import org.eclipse.jgit.diff.RawText import org.eclipse.jgit.diff.RawTextComparator /** - * Implements CID command line operations + * Implements lineage command line operations * * @author Paolo Di Tommaso */ @CompileStatic -class CidCommandImpl implements CmdCid.CidCommand { +class LinCommandImpl implements CmdLineage.LinCommand { @Canonical static class Edge { @@ -62,58 +62,58 @@ class CidCommandImpl implements CmdCid.CidCommand { @Override void log(ConfigMap config) { final session = new Session(config) - final store = CidStoreFactory.getOrCreate(session) + final store = LinStoreFactory.getOrCreate(session) if (store) { printHistory(store) } else { - println "Error CID store not loaded. Check Nextflow configuration." + println "Error lineage store not loaded. Check Nextflow configuration." } } - private void printHistory(CidStore store) { + private void printHistory(LinStore store) { final records = store.historyLog?.records if( !records ) { - println("No workflow runs CIDs found.") + println("No workflow runs LIDs found.") return } def table = new TableBuilder(cellSeparator: '\t') .head('TIMESTAMP') .head('RUN NAME') .head('SESSION ID') - .head('RUN CID') - for (CidHistoryRecord record : records) { + .head('RUN LID') + for (LinHistoryRecord record : records) { table.append(record.toList()) } println table.toString() } @Override - void show(ConfigMap config, List args) { - if( !isCidUri(args[0]) ) - throw new Exception("Identifier is not a CID URL") - final store = CidStoreFactory.getOrCreate(new Session(config)) + void describe(ConfigMap config, List args) { + if( !isLidUri(args[0]) ) + throw new Exception("Identifier is not a LID URL") + final store = LinStoreFactory.getOrCreate(new Session(config)) if ( !store ) { - println "Error CID store not loaded. Check Nextflow configuration." + println "Error lineage store not loaded. Check Nextflow configuration." return } try { - def entries = CidUtils.query(store, new URI(args[0])) + def entries = LinUtils.query(store, new URI(args[0])) if( !entries ) { println "No entries found for ${args[0]}" return } entries = entries.size() == 1 ? entries[0] : entries - println CidUtils.encodeSearchOutputs(entries, true) + println LinUtils.encodeSearchOutputs(entries, true) } catch (Throwable e) { println "Error loading ${args[0]}. ${e.message}" } } @Override - void lineage(ConfigMap config, List args) { - final store = CidStoreFactory.getOrCreate(new Session(config)) + void render(ConfigMap config, List args) { + final store = LinStoreFactory.getOrCreate(new Session(config)) if( !store ) { - println "Error CID store not loaded. Check Nextflow configuration." + println "Error lineage store not loaded. Check Nextflow configuration." return } try { @@ -124,11 +124,11 @@ class CidCommandImpl implements CmdCid.CidCommand { } } - private void renderLineage(CidStore store, String dataCid, Path file) { + private void renderLineage(LinStore store, String dataLid, Path file) { def lines = [] as List lines << "flowchart BT".toString() final nodesToRender = new LinkedList() - nodesToRender.add(dataCid) + nodesToRender.add(dataLid) final edgesToRender = new LinkedList() while (!nodesToRender.isEmpty()) { final node = nodesToRender.removeFirst() @@ -142,26 +142,26 @@ class CidCommandImpl implements CmdCid.CidCommand { file.text = template.replace('REPLACE_WITH_NETWORK_DATA', lines.join('\n')) } - private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, CidStore store) { - if (!isCidUri(nodeToRender)) - throw new Exception("Identifier is not a CID URL") - final key = nodeToRender.substring(CID_PROT.size()) - final cidObject = store.load(key) - switch (cidObject.getClass()) { + private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, LinStore store) { + if (!isLidUri(nodeToRender)) + throw new Exception("Identifier is not a LID URL") + final key = nodeToRender.substring(LID_PROT.size()) + final lidObject = store.load(key) + switch (lidObject.getClass()) { case DataOutput: - processDataOutput(cidObject as DataOutput, lines, nodeToRender, nodes, edges) + processDataOutput(lidObject as DataOutput, lines, nodeToRender, nodes, edges) break; case WorkflowRun: - processWorkflowRun(cidObject as WorkflowRun, lines, nodeToRender, edges) + processWorkflowRun(lidObject as WorkflowRun, lines, nodeToRender, edges) break case TaskRun: - processTaskRun(cidObject as TaskRun, lines, nodeToRender, nodes, edges) + processTaskRun(lidObject as TaskRun, lines, nodeToRender, nodes, edges) break default: - throw new Exception("Unrecognized type reference ${cidObject.getClass().getSimpleName()}") + throw new Exception("Unrecognized type reference ${lidObject.getClass().getSimpleName()}") } } @@ -189,12 +189,12 @@ class CidCommandImpl implements CmdCid.CidCommand { } } - private void processDataOutput(DataOutput cidObject, List lines, String nodeToRender, LinkedList nodes, LinkedList edges){ + private void processDataOutput(DataOutput lidObject, List lines, String nodeToRender, LinkedList nodes, LinkedList edges){ lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); - final source = cidObject.source + final source = lidObject.source if(! source ) return - if (isCidUri(source)) { + if (isLidUri(source)) { nodes.add(source) edges.add(new Edge(source, nodeToRender)) } else { @@ -215,7 +215,7 @@ class CidCommandImpl implements CmdCid.CidCommand { } if (value instanceof CharSequence) { final source = value.toString() - if (isCidUri(source)) { + if (isLidUri(source)) { nodes.add(source) edges.add(new Edge(source, nodeToRender)) return @@ -224,7 +224,7 @@ class CidCommandImpl implements CmdCid.CidCommand { if (value instanceof Map ) { if (value.path) { final path = value.path.toString() - if (isCidUri(path)) { + if (isLidUri(path)) { nodes.add(path) edges.add(new Edge(path, nodeToRender)) return @@ -243,28 +243,28 @@ class CidCommandImpl implements CmdCid.CidCommand { @Override void diff(ConfigMap config, List args) { - if (!isCidUri(args[0]) || !isCidUri(args[1])) - throw new Exception("Identifier is not a CID URL") + if (!isLidUri(args[0]) || !isLidUri(args[1])) + throw new Exception("Identifier is not a LID URL") - final store = CidStoreFactory.getOrCreate(new Session(config)) + final store = LinStoreFactory.getOrCreate(new Session(config)) if (!store) { - println "Error CID store not loaded. Check Nextflow configuration." + println "Error lineage store not loaded. Check Nextflow configuration." return } try { - final key1 = args[0].substring(CID_PROT.size()) + final key1 = args[0].substring(LID_PROT.size()) final entry1 = store.load(key1) if (!entry1) { println "No entry found for ${args[0]}." return } - final key2 = args[1].substring(CID_PROT.size()) + final key2 = args[1].substring(LID_PROT.size()) final entry2 = store.load(key2) if (!entry2) { println "No entry found for ${args[1]}." return } - final encoder = new CidEncoder().withPrettyPrint(true) + final encoder = new LinEncoder().withPrettyPrint(true) generateDiff(encoder.encode(entry1), key1, encoder.encode(entry2), key2) } catch (Throwable e) { println "Error generating diff between ${args[0]}: $e.message" @@ -302,13 +302,13 @@ class CidCommandImpl implements CmdCid.CidCommand { @Override void find(ConfigMap config, List args) { - final store = CidStoreFactory.getOrCreate(new Session(config)) + final store = LinStoreFactory.getOrCreate(new Session(config)) if (!store) { - println "Error CID store not loaded. Check Nextflow configuration." + println "Error lineage store not loaded. Check Nextflow configuration." return } try { - println CidUtils.encodeSearchOutputs(store.search(args[0]).keySet().collect {asUriString(it)}, true) + println LinUtils.encodeSearchOutputs(store.search(args[0]).keySet().collect {asUriString(it)}, true) } catch (Throwable e){ println "Error searching for ${args[0]}. ${e.message}" } diff --git a/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy similarity index 69% rename from modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy index ece424be4e..73a86acbce 100644 --- a/modules/nf-cid/src/main/nextflow/data/config/DataConfig.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy @@ -14,42 +14,42 @@ * limitations under the License. */ -package nextflow.data.config +package nextflow.lineage.config import groovy.transform.CompileStatic import nextflow.Global import nextflow.Session /** - * Model workflow data config + * Model workflow data lineage config * * @author Paolo Di Tommaso */ @CompileStatic -class DataConfig { +class LineageConfig { - final DataStoreOpts store + final LineageStoreOpts store final boolean enabled - DataConfig(Map opts) { - this.store = new DataStoreOpts(opts.store as Map ?: Map.of()) + LineageConfig(Map opts) { + this.store = new LineageStoreOpts(opts.store as Map ?: Map.of()) this.enabled = opts.enabled as boolean ?: false } static Map asMap() { - session?.config?.navigate('workflow.data') as Map ?: new HashMap() + session?.config?.navigate('workflow.lineage') as Map ?: new HashMap() } - static DataConfig create(Session session) { + static LineageConfig create(Session session) { if( session ) { - return new DataConfig( session.config.navigate('workflow.data') as Map ?: Map.of()) + return new LineageConfig( session.config.navigate('workflow.lineage') as Map ?: Map.of()) } else throw new IllegalStateException("Missing Nextflow session") } - static DataConfig create() { + static LineageConfig create() { create(getSession()) } diff --git a/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy similarity index 90% rename from modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy index 09da0c6476..b5bb86512c 100644 --- a/modules/nf-cid/src/main/nextflow/data/config/DataStoreOpts.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.config +package nextflow.lineage.config import groovy.transform.CompileStatic /** @@ -23,11 +23,11 @@ import groovy.transform.CompileStatic * @author Paolo Di Tommaso */ @CompileStatic -class DataStoreOpts { +class LineageStoreOpts { final String location - DataStoreOpts(Map opts) { + LineageStoreOpts(Map opts) { this.location = opts.location as String } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy similarity index 74% rename from modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy index 15ed3c7e7d..222385a917 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystem.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy @@ -14,10 +14,10 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs -import nextflow.data.cid.CidStore -import nextflow.data.cid.CidStoreFactory +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory import java.nio.file.FileStore import java.nio.file.FileSystem @@ -28,44 +28,44 @@ import java.nio.file.attribute.UserPrincipalLookupService import java.nio.file.spi.FileSystemProvider import groovy.transform.CompileStatic -import nextflow.data.config.DataConfig +import nextflow.lineage.config.LineageConfig /** - * File system for CID Paths + * File system for LID Paths * * @author Jorge Ejarque */ @CompileStatic -class CidFileSystem extends FileSystem { +class LinFileSystem extends FileSystem { - private CidFileSystemProvider provider + private LinFileSystemProvider provider - private CidStore cidStore + private LinStore store /* * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 */ - protected CidFileSystem(){} + protected LinFileSystem(){} - CidFileSystem(CidFileSystemProvider provider, DataConfig config) { + LinFileSystem(LinFileSystemProvider provider, LineageConfig config) { this.provider = provider - this.cidStore = CidStoreFactory.create(config) + this.store = LinStoreFactory.create(config) } - CidStore getCidStore() { - return cidStore + LinStore getStore() { + return store } @Override boolean equals( Object other ) { if( this.class != other.class ) return false - final that = (CidFileSystem)other - this.provider == that.provider && this.cidStore == that.cidStore + final that = (LinFileSystem)other + this.provider == that.provider && this.store == that.store } @Override int hashCode() { - Objects.hash(provider,cidStore) + Objects.hash(provider,store) } @Override @@ -90,7 +90,7 @@ class CidFileSystem extends FileSystem { @Override String getSeparator() { - return CidPath.SEPARATOR + return LinPath.SEPARATOR } @Override @@ -110,12 +110,12 @@ class CidFileSystem extends FileSystem { @Override Path getPath(String first, String... more) { - final path = more ? CidPath.SEPARATOR + more.join(CidPath. SEPARATOR) : '' - return getPath(CidPath.asUri(CidPath.CID_PROT + first + path)) + final path = more ? LinPath.SEPARATOR + more.join(LinPath. SEPARATOR) : '' + return getPath(LinPath.asUri(LinPath.LID_PROT + first + path)) } Path getPath(URI uri){ - return new CidPath(this, uri) + return new LinPath(this, uri) } @Override diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy similarity index 72% rename from modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy index 7ae06216c1..5328d97912 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidFileSystemProvider.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs import java.nio.ByteBuffer import java.nio.channels.NonWritableChannelException @@ -37,31 +37,31 @@ import java.nio.file.attribute.FileAttributeView import java.nio.file.spi.FileSystemProvider import groovy.transform.CompileStatic -import nextflow.data.config.DataConfig +import nextflow.lineage.config.LineageConfig /** - * File System Provider for CID Paths + * File System Provider for LID Paths * * @author Jorge Ejarque */ @CompileStatic -class CidFileSystemProvider extends FileSystemProvider { +class LinFileSystemProvider extends FileSystemProvider { - public static final String SCHEME = "cid" + public static final String SCHEME = "lid" - private CidFileSystem fileSystem + private LinFileSystem fileSystem @Override String getScheme() { return SCHEME } - protected CidPath toCidPath(Path path) { - if (path !instanceof CidPath) + protected LinPath toLinPath(Path path) { + if (path !instanceof LinPath) throw new ProviderMismatchException() - if (path instanceof CidMetadataPath) - return (CidMetadataPath) path - return (CidPath) path + if (path instanceof LinMetadataPath) + return (LinMetadataPath) path + return (LinPath) path } private void checkScheme(URI uri) { @@ -75,13 +75,13 @@ class CidFileSystemProvider extends FileSystemProvider { checkScheme(uri) if (!fileSystem) { //Overwrite default values with provided configuration - final defaultConfig = DataConfig.asMap() + final defaultConfig = LineageConfig.asMap() if (config) { for (Map.Entry e : config.entrySet()) { defaultConfig.put(e.key, e.value) } } - fileSystem = new CidFileSystem(this, new DataConfig(defaultConfig)) + fileSystem = new LinFileSystem(this, new LineageConfig(defaultConfig)) } return fileSystem } @@ -96,14 +96,14 @@ class CidFileSystemProvider extends FileSystemProvider { synchronized FileSystem getFileSystemOrCreate(URI uri) { checkScheme(uri) if (!fileSystem) { - fileSystem = (CidFileSystem) newFileSystem(uri, DataConfig.asMap()) + fileSystem = (LinFileSystem) newFileSystem(uri, LineageConfig.asMap()) } return fileSystem } @Override - CidPath getPath(URI uri) { - return (CidPath) ((CidFileSystem) getFileSystemOrCreate(uri)).getPath(uri) + LinPath getPath(URI uri) { + return (LinPath) ((LinFileSystem) getFileSystemOrCreate(uri)).getPath(uri) } @Override @@ -113,31 +113,31 @@ class CidFileSystemProvider extends FileSystemProvider { @Override InputStream newInputStream(Path path, OpenOption... options) throws IOException { - final cid = toCidPath(path) - if (cid instanceof CidMetadataPath) - return (cid as CidMetadataPath).newInputStream() - return newInputStream0(cid, options) + final lid = toLinPath(path) + if (lid instanceof LinMetadataPath) + return (lid as LinMetadataPath).newInputStream() + return newInputStream0(lid, options) } - private static InputStream newInputStream0(CidPath cid, OpenOption... options) throws IOException { - final realPath = cid.getTargetOrMetadataPath() - if (realPath instanceof CidMetadataPath) - return (realPath as CidMetadataPath).newInputStream() + private static InputStream newInputStream0(LinPath lid, OpenOption... options) throws IOException { + final realPath = lid.getTargetOrMetadataPath() + if (realPath instanceof LinMetadataPath) + return (realPath as LinMetadataPath).newInputStream() return realPath.fileSystem.provider().newInputStream(realPath, options) } @Override SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { - final cid = toCidPath(path) + final lid = toLinPath(path) validateOptions(options) - return newByteChannel0(cid, options, attrs) + return newByteChannel0(lid, options, attrs) } - private class CidPathSeekableByteChannel implements SeekableByteChannel { + private class LinPathSeekableByteChannel implements SeekableByteChannel { SeekableByteChannel channel - CidPathSeekableByteChannel(SeekableByteChannel channel) { + LinPathSeekableByteChannel(SeekableByteChannel channel) { this.channel = channel } @@ -194,33 +194,33 @@ class CidFileSystemProvider extends FileSystemProvider { } - private SeekableByteChannel newByteChannel0(CidPath cid, Set options, FileAttribute... attrs) { - if (cid instanceof CidMetadataPath) { - return (cid as CidMetadataPath).newSeekableByteChannel() + private SeekableByteChannel newByteChannel0(LinPath lid, Set options, FileAttribute... attrs) { + if (lid instanceof LinMetadataPath) { + return (lid as LinMetadataPath).newSeekableByteChannel() } - final realPath = cid.getTargetOrMetadataPath() - if (realPath instanceof CidMetadataPath) { - return (realPath as CidMetadataPath).newSeekableByteChannel() + final realPath = lid.getTargetOrMetadataPath() + if (realPath instanceof LinMetadataPath) { + return (realPath as LinMetadataPath).newSeekableByteChannel() } else { SeekableByteChannel channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) - return new CidPathSeekableByteChannel(channel) + return new LinPathSeekableByteChannel(channel) } } @Override DirectoryStream newDirectoryStream(Path path, DirectoryStream.Filter filter) throws IOException { - final cid = toCidPath(path) - final real = cid.getTargetPath() + final lid = toLinPath(path) + final real = lid.getTargetPath() final stream = real .getFileSystem() .provider() - .newDirectoryStream(real, new CidFilter(fileSystem)) + .newDirectoryStream(real, new LidFilter(fileSystem)) return new DirectoryStream() { @Override Iterator iterator() { - return new CidIterator(fileSystem, stream.iterator(), cid, real) + return new LidIterator(fileSystem, stream.iterator(), lid, real) } @Override @@ -230,11 +230,11 @@ class CidFileSystemProvider extends FileSystemProvider { } } - private class CidFilter implements DirectoryStream.Filter { + private class LidFilter implements DirectoryStream.Filter { - private final CidFileSystem fs + private final LinFileSystem fs - CidFilter(CidFileSystem fs) { + LidFilter(LinFileSystem fs) { this.fs = fs } @@ -244,26 +244,26 @@ class CidFileSystemProvider extends FileSystemProvider { } } - private static CidPath fromRealToCidPath(Path toConvert, Path realBase, CidPath cidBase) { + private static LinPath fromRealToLinPath(Path toConvert, Path realBase, LinPath lidBase) { if (toConvert.isAbsolute()) { if (toConvert.class != realBase.class) { throw new ProviderMismatchException() } final relative = realBase.relativize(toConvert) - return (CidPath) cidBase.resolve(relative.toString()) + return (LinPath) lidBase.resolve(relative.toString()) } else { - return (CidPath) cidBase.resolve(toConvert.toString()) + return (LinPath) lidBase.resolve(toConvert.toString()) } } - private static class CidIterator implements Iterator { + private static class LidIterator implements Iterator { - private final CidFileSystem fs + private final LinFileSystem fs private final Iterator target - private final CidPath parent + private final LinPath parent private final Path parentReal - CidIterator(CidFileSystem fs, Iterator itr, CidPath parent, Path real) { + LidIterator(LinFileSystem fs, Iterator itr, LinPath parent, Path real) { this.fs = fs this.target = itr this.parent = parent @@ -276,9 +276,9 @@ class CidFileSystemProvider extends FileSystemProvider { } @Override - CidPath next() { + LinPath next() { final path = target.next() - return path ? fromRealToCidPath(path, parentReal, parent) : null + return path ? fromRealToLinPath(path, parentReal, parent) : null } } @@ -309,7 +309,7 @@ class CidFileSystemProvider extends FileSystemProvider { @Override boolean isHidden(Path path) throws IOException { - return toCidPath(path).getTargetOrMetadataPath().isHidden() + return toLinPath(path).getTargetOrMetadataPath().isHidden() } @Override @@ -320,15 +320,15 @@ class CidFileSystemProvider extends FileSystemProvider { @Override void checkAccess(Path path, AccessMode... modes) throws IOException { validateAccessModes(modes) - final cid = toCidPath(path) - if (cid instanceof CidMetadataPath) + final lid = toLinPath(path) + if (lid instanceof LinMetadataPath) return - checkAccess0(cid, modes) + checkAccess0(lid, modes) } - private void checkAccess0(CidPath cid, AccessMode... modes) { - final real = cid.getTargetOrMetadataPath() - if (real instanceof CidMetadataPath) + private void checkAccess0(LinPath lid, AccessMode... modes) { + final real = lid.getTargetOrMetadataPath() + if (real instanceof LinMetadataPath) return real.fileSystem.provider().checkAccess(real, modes) } @@ -349,16 +349,16 @@ class CidFileSystemProvider extends FileSystemProvider { @Override A readAttributes(Path path, Class type, LinkOption... options) throws IOException { - final cid = toCidPath(path) - if (cid instanceof CidMetadataPath) - return (cid as CidMetadataPath).readAttributes(type) - readAttributes0(cid, type, options) + final lid = toLinPath(path) + if (lid instanceof LinMetadataPath) + return (lid as LinMetadataPath).readAttributes(type) + readAttributes0(lid, type, options) } - private A readAttributes0(CidPath cid, Class type, LinkOption... options) throws IOException { - final real = cid.getTargetOrMetadataPath() - if (real instanceof CidMetadataPath) - return (real as CidMetadataPath).readAttributes(type) + private A readAttributes0(LinPath lid, Class type, LinkOption... options) throws IOException { + final real = lid.getTargetOrMetadataPath() + if (real instanceof LinMetadataPath) + return (real as LinMetadataPath).readAttributes(type) return real.fileSystem.provider().readAttributes(real, type, options) } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataPath.groovy similarity index 89% rename from modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataPath.groovy index 0801b2936c..9922fa9cee 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidMetadataPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataPath.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs import groovy.transform.CompileStatic @@ -28,11 +28,11 @@ import java.nio.file.attribute.FileTime * @author Jorge Ejarque */ @CompileStatic -class CidMetadataPath extends CidPath { +class LinMetadataPath extends LinPath { private byte[] results private FileTime creationTime - CidMetadataPath(String resultsObject, FileTime creationTime, CidFileSystem fs, String path, String[] childs) { + LinMetadataPath(String resultsObject, FileTime creationTime, LinFileSystem fs, String path, String[] childs) { super(fs, "${path}${childs ? '#'+ childs.join('.') : ''}") this.results = resultsObject.getBytes("UTF-8") this.creationTime = creationTime @@ -43,7 +43,7 @@ class CidMetadataPath extends CidPath { } SeekableByteChannel newSeekableByteChannel(){ - return new ResultsSeekableByteChannel(results) + return new LinMetadataSeekableByteChannel(results) } A readAttributes(Class type){ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataSeekableByteChannel.groovy similarity index 93% rename from modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataSeekableByteChannel.groovy index 3b3be03efb..6a78cd9b1e 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/ResultsSeekableByteChannel.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataSeekableByteChannel.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs import groovy.transform.CompileStatic @@ -29,11 +29,11 @@ import java.nio.channels.SeekableByteChannel * @author Jorge Ejarque */ @CompileStatic -class ResultsSeekableByteChannel implements SeekableByteChannel { +class LinMetadataSeekableByteChannel implements SeekableByteChannel { private final ByteBuffer buffer private boolean open - ResultsSeekableByteChannel(byte[] bytes){ + LinMetadataSeekableByteChannel(byte[] bytes){ this.open = true this.buffer = ByteBuffer.wrap(bytes) } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy similarity index 74% rename from modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 0fba3cf024..e0d7019455 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -14,18 +14,18 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs import groovy.util.logging.Slf4j -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataOutput +import nextflow.lineage.serde.LinSerializable import nextflow.file.LogicalDataPath import nextflow.util.CacheHelper import nextflow.util.TestOnly -import static nextflow.data.cid.fs.CidFileSystemProvider.* -import static nextflow.data.cid.CidUtils.* +import static LinFileSystemProvider.* +import static nextflow.lineage.LinUtils.* import java.nio.file.FileSystem import java.nio.file.LinkOption @@ -40,23 +40,23 @@ import groovy.transform.CompileStatic import nextflow.file.FileHelper /** - * CID file system path + * LID file system path * * @author Jorge Ejarque */ @Slf4j @CompileStatic -class CidPath implements Path, LogicalDataPath { +class LinPath implements Path, LogicalDataPath { static public final List SUPPORTED_CHECKSUM_ALGORITHMS=["nextflow"] static public final String SEPARATOR = '/' - public static final String CID_PROT = "${SCHEME}://" + public static final String LID_PROT = "${SCHEME}://" static private final String[] EMPTY = new String[] {} - private CidFileSystem fileSystem + private LinFileSystem fileSystem - // String with the cid file path + // String with the lineage file path private String filePath private String query @@ -66,11 +66,11 @@ class CidPath implements Path, LogicalDataPath { /* * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 */ - protected CidPath(){} + protected LinPath(){} - CidPath(CidFileSystem fs, URI uri) { + LinPath(LinFileSystem fs, URI uri) { if( uri.scheme != SCHEME ) { - throw new IllegalArgumentException("Invalid CID URI - scheme is different for $SCHEME") + throw new IllegalArgumentException("Invalid LID URI - scheme is different for $SCHEME") } this.fileSystem = fs this.query = uri.query @@ -78,7 +78,7 @@ class CidPath implements Path, LogicalDataPath { this.filePath = resolve0( fs, norm0("${uri.authority?:''}${uri.path}") ) } - protected CidPath( String query, String fragment, String filepath, CidFileSystem fs){ + protected LinPath(String query, String fragment, String filepath, LinFileSystem fs){ this.fileSystem = fs this.query = query this.fragment = fragment @@ -86,20 +86,20 @@ class CidPath implements Path, LogicalDataPath { } - CidPath(CidFileSystem fs, String path) { - this( fs, asUri( CID_PROT + norm0(path)) ) + LinPath(LinFileSystem fs, String path) { + this( fs, asUri( LID_PROT + norm0(path)) ) } - CidPath(CidFileSystem fs, String first, String[] more) { - this( fs, asUri( CID_PROT + buildPath(first, more) ) ) + LinPath(LinFileSystem fs, String first, String[] more) { + this( fs, asUri( LID_PROT + buildPath(first, more) ) ) } static String asUriString(String first, String... more) { - return CID_PROT + buildPath(first, more) + return LID_PROT + buildPath(first, more) } - static boolean isCidUri(String path) { - return path && path.startsWith(CID_PROT) + static boolean isLidUri(String path) { + return path && path.startsWith(LID_PROT) } private static String buildPath(String first, String[] more){ @@ -111,11 +111,11 @@ class CidPath implements Path, LogicalDataPath { return first } - protected static void validateDataOutput(DataOutput cidObject) { - final hashedPath = FileHelper.toCanonicalPath(cidObject.path as String) + protected static void validateDataOutput(DataOutput lidObject) { + final hashedPath = FileHelper.toCanonicalPath(lidObject.path as String) if( !hashedPath.exists() ) - throw new FileNotFoundException("Target path $cidObject.path does not exists.") - validateChecksum(cidObject.checksum, hashedPath) + throw new FileNotFoundException("Target path $lidObject.path does not exists.") + validateChecksum(lidObject.checksum, hashedPath) } protected static void validateChecksum(Checksum checksum, Path hashedPath) { @@ -142,24 +142,25 @@ class CidPath implements Path, LogicalDataPath { /** - * Finds the target path of a CID path. - * @param fs CID fileSystem associated to the CidPath to find - * @param filePath Path associated to the CidPath to find - * @param resultsAsPath True to return metadata descriptions as CidMetadataPath + + * Finds the target path of a LinPath. + * @param fs LinFileSystem associated to the LinPath to find + * @param filePath Path associated to the LinPath to find + * @param resultsAsPath True to return metadata descriptions as LinMetadataPath * @param children Sub-object/path inside the description - * @return Path or CidMetadataPath associated to the CidPath + * @return Path or LinMetadataPath associated to the LinPath * @throws Exception - * IllegalArgumentException if the filepath, filesystem or its CidStore are null. - * FileNotFoundException if the filePath or children are not found in the CidStore. + * IllegalArgumentException if the filepath, filesystem or its LinStore are null. + * FileNotFoundException if the filePath or children are not found in the LinStore. */ - protected static Path findTarget(CidFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception { + protected static Path findTarget(LinFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception { if( !fs ) - throw new IllegalArgumentException("Cannot get target path for a relative CidPath") + throw new IllegalArgumentException("Cannot get target path for a relative LinPath") if( filePath.isEmpty() || filePath == SEPARATOR ) - throw new IllegalArgumentException("Cannot get target path for an empty CidPath") - final store = fs.getCidStore() + throw new IllegalArgumentException("Cannot get target path for an empty LinPath") + final store = fs.getStore() if( !store ) - throw new IllegalArgumentException("CID store not found. Check Nextflow configuration.") + throw new Exception("Lineage store not found. Check Nextflow configuration.") final object = store.load(filePath) if ( object ){ if( object instanceof DataOutput ) { @@ -183,55 +184,55 @@ class CidPath implements Path, LogicalDataPath { throw new FileNotFoundException("Target path '$filePath' does not exists.") } - protected static Path getMetadataAsTargetPath(CidSerializable results, CidFileSystem fs, String filePath, String[] children){ + protected static Path getMetadataAsTargetPath(LinSerializable results, LinFileSystem fs, String filePath, String[] children){ if( !results ) { throw new FileNotFoundException("Target path '$filePath' does not exist.") } if (children && children.size() > 0) { return getSubObjectAsPath(fs, filePath, results, children) } else { - return generateCidMetadataPath(fs, filePath, results, children) + return generateLinMetadataPath(fs, filePath, results, children) } } /** - * Get a metadata sub-object as CidMetadataPath. + * Get a metadata sub-object as LinMetadataPath. * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. * - * @param fs CidFilesystem for the te. + * @param fs LinFilesystem for the te. * @param key Parent metadata key. * @param object Parent object. * @param children Array of string in indicating the properties to navigate to get the sub-object. - * @return CidMetadataPath or null in it does not exist. + * @return LinMetadataPath or null in it does not exist. */ - static CidMetadataPath getSubObjectAsPath(CidFileSystem fs, String key, CidSerializable object, String[] children) { + static LinMetadataPath getSubObjectAsPath(LinFileSystem fs, String key, LinSerializable object, String[] children) { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description - final outputs = fs.cidStore.load("${key}/outputs") + final outputs = fs.store.load("${key}/outputs") if( !outputs ) { throw new FileNotFoundException("Target path '$key#outputs' does not exist.") } - return generateCidMetadataPath(fs, key, outputs, children) + return generateLinMetadataPath(fs, key, outputs, children) } else { - return generateCidMetadataPath(fs, key, object, children) + return generateLinMetadataPath(fs, key, object, children) } } - private static CidMetadataPath generateCidMetadataPath(CidFileSystem fs, String key, Object object, String[] children){ + private static LinMetadataPath generateLinMetadataPath(LinFileSystem fs, String key, Object object, String[] children){ def creationTime = toFileTime(navigate(object, 'createdAt') as OffsetDateTime ?: OffsetDateTime.now()) final output = children ? navigate(object, children.join('.')) : object if( !output ) { throw new FileNotFoundException("Target path '$key#${children.join('.')}' does not exist.") } - return new CidMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) + return new LinMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) } private static Path getTargetPathFromOutput(DataOutput object, String[] children) { - final cidObject = object as DataOutput + final lidObject = object as DataOutput // return the real path stored in the metadata - validateDataOutput(cidObject) - def realPath = FileHelper.toCanonicalPath(cidObject.path as String) + validateDataOutput(lidObject) + def realPath = FileHelper.toCanonicalPath(lidObject.path as String) if (children && children.size() > 0) realPath = realPath.resolve(children.join(SEPARATOR)) if (!realPath.exists()) @@ -239,11 +240,11 @@ class CidPath implements Path, LogicalDataPath { return realPath } - private static boolean isEmptyBase(CidFileSystem fs, String base){ + private static boolean isEmptyBase(LinFileSystem fs, String base){ return !base || base == SEPARATOR || (fs && base == "..") } - private static String resolve0(CidFileSystem fs, String base, String[] more) { + private static String resolve0(LinFileSystem fs, String base, String[] more) { if( isEmptyBase(fs,base) ) { return resolveEmptyPathCase(fs, more as List) } @@ -256,7 +257,7 @@ class CidPath implements Path, LogicalDataPath { return more ? result.resolve(more.join(SEPARATOR)).toString() : result.toString() } - private static String resolveEmptyPathCase(CidFileSystem fs, List more ){ + private static String resolveEmptyPathCase(LinFileSystem fs, List more ){ switch(more.size()) { case 0: return "/" @@ -299,13 +300,13 @@ class CidPath implements Path, LogicalDataPath { @Override Path getRoot() { - return new CidPath(fileSystem, SEPARATOR) + return new LinPath(fileSystem, SEPARATOR) } @Override Path getFileName() { final result = Path.of(filePath).getFileName()?.toString() - return result ? new CidPath( fragment, query, result, null) : null + return result ? new LinPath( fragment, query, result, null) : null } @Override @@ -314,7 +315,7 @@ class CidPath implements Path, LogicalDataPath { if( c>1 ) return subpath(0,c-1) if( c==1 ) - return new CidPath(fileSystem,SEPARATOR) + return new LinPath(fileSystem,SEPARATOR) return null } @@ -329,9 +330,9 @@ class CidPath implements Path, LogicalDataPath { throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") final path = Path.of(filePath) if (index == path.nameCount - 1){ - return new CidPath( fragment, query, path.getName(index).toString(), null) + return new LinPath( fragment, query, path.getName(index).toString(), null) } - return new CidPath(index==0 ? fileSystem : null, path.getName(index).toString()) + return new LinPath(index==0 ? fileSystem : null, path.getName(index).toString()) } @Override @@ -339,12 +340,12 @@ class CidPath implements Path, LogicalDataPath { if( beginIndex<0 ) throw new IllegalArgumentException("subpath begin index cannot be less than zero - offending value: $beginIndex") final path = Path.of(filePath) - return new CidPath(beginIndex==0 ? fileSystem : null, path.subpath(beginIndex, endIndex).toString()) + return new LinPath(beginIndex==0 ? fileSystem : null, path.subpath(beginIndex, endIndex).toString()) } @Override Path normalize() { - return new CidPath(fileSystem, Path.of(filePath).normalize().toString()) + return new LinPath(fileSystem, Path.of(filePath).normalize().toString()) } @Override @@ -369,10 +370,10 @@ class CidPath implements Path, LogicalDataPath { @Override Path resolve(Path other) { - if( CidPath.class != other.class ) + if( LinPath.class != other.class ) throw new ProviderMismatchException() - final that = (CidPath)other + final that = (LinPath)other if( that.fileSystem && this.fileSystem != that.fileSystem ) return other @@ -380,7 +381,7 @@ class CidPath implements Path, LogicalDataPath { return that } else { final newPath = Path.of(filePath).resolve(that.toString()) - return new CidPath(that.query, that.fragment, newPath.toString(), fileSystem) + return new LinPath(that.query, that.fragment, newPath.toString(), fileSystem) } } @@ -390,8 +391,8 @@ class CidPath implements Path, LogicalDataPath { return this final scheme = FileHelper.getUrlProtocol(path) if( !scheme ) { - // consider the path as a cid relative path - return resolve(new CidPath(null,path)) + // consider the path as a lid relative path + return resolve(new LinPath(null,path)) } if( scheme != SCHEME ) { throw new ProviderMismatchException() @@ -402,21 +403,21 @@ class CidPath implements Path, LogicalDataPath { @Override Path relativize(Path other) { - if( CidPath.class != other.class ) { + if( LinPath.class != other.class ) { throw new ProviderMismatchException() } - CidPath cidOther = other as CidPath - if( this.isAbsolute() != cidOther.isAbsolute() ) + LinPath lidOther = other as LinPath + if( this.isAbsolute() != lidOther.isAbsolute() ) throw new IllegalArgumentException("Cannot compare absolute with relative paths"); def path if( this.isAbsolute() ) { // Compare 'filePath' as absolute paths adding the root separator - path = Path.of(SEPARATOR + filePath).relativize(Path.of(SEPARATOR + cidOther.filePath)) + path = Path.of(SEPARATOR + filePath).relativize(Path.of(SEPARATOR + lidOther.filePath)) } else { // Compare 'filePath' as relative paths - path = Path.of(filePath).relativize(Path.of(cidOther.filePath)) + path = Path.of(filePath).relativize(Path.of(lidOther.filePath)) } - return new CidPath(cidOther.query, cidOther.fragment, path.getNameCount()>0 ? path.toString() : SEPARATOR, null) + return new LinPath(lidOther.query, lidOther.fragment, path.getNameCount()>0 ? path.toString() : SEPARATOR, null) } @Override @@ -445,7 +446,7 @@ class CidPath implements Path, LogicalDataPath { * Get the path associated to a DataOutput metadata. * * @return Path associated to a DataOutput - * @throws FileNotFoundException if the metadata associated to the CidPath does not exist or its type is not a DataOutput. + * @throws FileNotFoundException if the metadata associated to the LinPath does not exist or its type is not a DataOutput. */ protected Path getTargetPath() { return findTarget(fileSystem, filePath, false, parseChildrenFormFragment(fragment)) @@ -453,8 +454,8 @@ class CidPath implements Path, LogicalDataPath { /** * Get the path associated to any metadata object. - * @return Path associated to a DataOutput or CidMetadataFile with the metadata object for other types. - * @throws FileNotFoundException if the metadata associated to the CidPath does not exist. + * @return Path associated to a DataOutput or LinMetadataFile with the metadata object for other types. + * @throws FileNotFoundException if the metadata associated to the LinPath does not exist. */ protected Path getTargetOrMetadataPath(){ return findTarget(fileSystem, filePath, true, parseChildrenFormFragment(fragment)) @@ -462,12 +463,12 @@ class CidPath implements Path, LogicalDataPath { @Override File toFile() throws IOException { - throw new UnsupportedOperationException("toFile not supported by CidPath") + throw new UnsupportedOperationException("toFile not supported by LinPath") } @Override WatchKey register(WatchService watcher, WatchEvent.Kind[] events, WatchEvent.Modifier... modifiers) throws IOException { - throw new UnsupportedOperationException("Register not supported by CidPath") + throw new UnsupportedOperationException("Register not supported by LinPath") } @Override @@ -477,10 +478,10 @@ class CidPath implements Path, LogicalDataPath { @Override boolean equals(Object other) { - if( CidPath.class != other.class ) { + if( LinPath.class != other.class ) { return false } - final that = (CidPath)other + final that = (LinPath)other return this.fileSystem == that.fileSystem && this.filePath.equals(that.filePath) } @@ -495,12 +496,12 @@ class CidPath implements Path, LogicalDataPath { static URI asUri(String path) { if (!path) throw new IllegalArgumentException("Missing 'path' argument") - if (!path.startsWith(CID_PROT)) - throw new IllegalArgumentException("Invalid CID file system path URI - it must start with '${CID_PROT}' prefix - offendinf value: $path") - if (path.startsWith(CID_PROT + SEPARATOR) && path.length() > 7) - throw new IllegalArgumentException("Invalid CID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") - if (path == CID_PROT) //Empty path case - return new URI("cid:///") + if (!path.startsWith(LID_PROT)) + throw new IllegalArgumentException("Invalid LID file system path URI - it must start with '${LID_PROT}' prefix - offendinf value: $path") + if (path.startsWith(LID_PROT + SEPARATOR) && path.length() > 7) + throw new IllegalArgumentException("Invalid LID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") + if (path == LID_PROT) //Empty path case + return new URI("lid:///") return new URI(path) } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPathFactory.groovy similarity index 68% rename from modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/fs/LinPathFactory.groovy index 567f047094..1c2c7350e1 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/fs/CidPathFactory.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPathFactory.groovy @@ -14,32 +14,32 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs -import static nextflow.data.cid.fs.CidPath.* +import static LinPath.* import java.nio.file.Path import groovy.transform.CompileStatic -import nextflow.data.config.DataConfig +import nextflow.lineage.config.LineageConfig import nextflow.file.FileHelper import nextflow.file.FileSystemPathFactory /** - * Implements a {@link FileSystemPathFactory} for CID file system + * Implements a {@link FileSystemPathFactory} for LID file system * * @author Jorge Ejarque */ @CompileStatic -class CidPathFactory extends FileSystemPathFactory { +class LinPathFactory extends FileSystemPathFactory { @Override protected Path parseUri(String uri) { - return isCidUri(uri) ? create(uri) : null + return isLidUri(uri) ? create(uri) : null } @Override protected String toUriString(Path path) { - return path instanceof CidPath ? ((CidPath)path).toUriString() : null + return path instanceof LinPath ? ((LinPath)path).toUriString() : null } @Override @@ -52,8 +52,8 @@ class CidPathFactory extends FileSystemPathFactory { return null } - static CidPath create(String path) { - final uri = CidPath.asUri(path) - return (CidPath) FileHelper.getOrCreateFileSystemFor(uri, DataConfig.asMap()).provider().getPath(uri) + static LinPath create(String path) { + final uri = LinPath.asUri(path) + return (LinPath) FileHelper.getOrCreateFileSystemFor(uri, LineageConfig.asMap()).provider().getPath(uri) } } diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Annotation.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/Annotation.groovy similarity index 96% rename from modules/nf-cid/src/main/nextflow/data/cid/model/Annotation.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/Annotation.groovy index c6f3db6917..064bc4e64e 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Annotation.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/Annotation.groovy @@ -15,7 +15,7 @@ * */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/Checksum.groovy similarity index 97% rename from modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/Checksum.groovy index aa653ba9f5..a1060edb63 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Checksum.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/Checksum.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import java.nio.file.Path diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/DataOutput.groovy similarity index 93% rename from modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/DataOutput.groovy index 1a027b5ebd..41edf3c501 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/DataOutput.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/DataOutput.groovy @@ -14,11 +14,11 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.serde.LinSerializable import java.time.OffsetDateTime @@ -29,7 +29,7 @@ import java.time.OffsetDateTime */ @Canonical @CompileStatic -class DataOutput implements CidSerializable { +class DataOutput implements LinSerializable { /** * Real path of the output data. */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/DataPath.groovy similarity index 96% rename from modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/DataPath.groovy index 8688d4d1b4..526c0dc309 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/DataPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/DataPath.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/Parameter.groovy similarity index 96% rename from modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/Parameter.groovy index 29210442ec..3e3c00343f 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Parameter.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/Parameter.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutputs.groovy similarity index 89% rename from modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutputs.groovy index 929ecc0345..c995044ea8 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskOutputs.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutputs.groovy @@ -14,13 +14,12 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.serde.LinSerializable -import java.time.Instant import java.time.OffsetDateTime /** @@ -30,7 +29,7 @@ import java.time.OffsetDateTime */ @Canonical @CompileStatic -class TaskOutputs implements CidSerializable { +class TaskOutputs implements LinSerializable { /** * Reference to the task that generated the data. */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy similarity index 93% rename from modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy index 04f722cc33..ec9c35af0c 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/TaskRun.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy @@ -14,11 +14,11 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.serde.LinSerializable /** * Models a task execution. @@ -27,7 +27,7 @@ import nextflow.data.cid.serde.CidSerializable */ @Canonical @CompileStatic -class TaskRun implements CidSerializable { +class TaskRun implements LinSerializable { /** * Execution session identifier */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/Workflow.groovy similarity index 89% rename from modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/Workflow.groovy index fb42cb3bd1..b47781e47a 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/Workflow.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/Workflow.groovy @@ -14,11 +14,11 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.serde.LinSerializable /** @@ -28,7 +28,7 @@ import nextflow.data.cid.serde.CidSerializable */ @Canonical @CompileStatic -class Workflow implements CidSerializable { +class Workflow implements LinSerializable { /** * List of script files defining a workflow */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutputs.groovy similarity index 88% rename from modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutputs.groovy index c78fb6bd5d..e9d727bdd7 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowOutputs.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutputs.groovy @@ -14,13 +14,12 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.serde.LinSerializable -import java.time.Instant import java.time.OffsetDateTime /** @@ -30,7 +29,7 @@ import java.time.OffsetDateTime */ @Canonical @CompileStatic -class WorkflowOutputs implements CidSerializable { +class WorkflowOutputs implements LinSerializable { /** * Creation date of the workflow outputs description */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy similarity index 91% rename from modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy index 77903d5850..0bfc615863 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/model/WorkflowRun.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy @@ -14,11 +14,11 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import groovy.transform.Canonical import groovy.transform.CompileStatic -import nextflow.data.cid.serde.CidSerializable +import nextflow.lineage.serde.LinSerializable /** * Models a Workflow Execution @@ -27,7 +27,7 @@ import nextflow.data.cid.serde.CidSerializable */ @Canonical @CompileStatic -class WorkflowRun implements CidSerializable { +class WorkflowRun implements LinSerializable { /** * Description of the workflow associated to the workflow run. */ diff --git a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy similarity index 68% rename from modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy index 6f23cdbc69..7c50d27d52 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidEncoder.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy @@ -14,34 +14,34 @@ * limitations under the License. */ -package nextflow.data.cid.serde +package nextflow.lineage.serde import groovy.transform.CompileStatic -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.TaskOutputs -import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutputs -import nextflow.data.cid.model.WorkflowRun +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowRun import nextflow.serde.gson.GsonEncoder import nextflow.serde.gson.RuntimeTypeAdapterFactory /** - * Implements a JSON encoder for CID model objects + * Implements a JSON encoder for lineage model objects * * @author Paolo Di Tommaso */ @CompileStatic -class CidEncoder extends GsonEncoder { +class LinEncoder extends GsonEncoder { - CidEncoder() { - withTypeAdapterFactory(newCidTypeAdapterFactory()) + LinEncoder() { + withTypeAdapterFactory(newLidTypeAdapterFactory()) // enable rendering of null values withSerializeNulls(true) } - static RuntimeTypeAdapterFactory newCidTypeAdapterFactory(){ - RuntimeTypeAdapterFactory.of(CidSerializable.class, "type") + static RuntimeTypeAdapterFactory newLidTypeAdapterFactory(){ + RuntimeTypeAdapterFactory.of(LinSerializable.class, "type") .registerSubtype(WorkflowRun, WorkflowRun.simpleName) .registerSubtype(WorkflowOutputs, WorkflowOutputs.simpleName) .registerSubtype(Workflow, Workflow.simpleName) diff --git a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinSerializable.groovy similarity index 84% rename from modules/nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/serde/LinSerializable.groovy index a0eee91cad..fabb0bded9 100644 --- a/modules/nf-cid/src/main/nextflow/data/cid/serde/CidSerializable.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinSerializable.groovy @@ -14,16 +14,16 @@ * limitations under the License. */ -package nextflow.data.cid.serde +package nextflow.lineage.serde import groovy.transform.CompileStatic import nextflow.serde.JsonSerializable /** - * Marker interface for CID serializable objects + * Marker interface for lineage serializable objects * * @author Paolo Di Tommaso */ @CompileStatic -interface CidSerializable extends JsonSerializable { +interface LinSerializable extends JsonSerializable { } diff --git a/modules/nf-cid/src/resources/META-INF/extensions.idx b/modules/nf-lineage/src/resources/META-INF/extensions.idx similarity index 83% rename from modules/nf-cid/src/resources/META-INF/extensions.idx rename to modules/nf-lineage/src/resources/META-INF/extensions.idx index e205ab34be..53c350a1be 100644 --- a/modules/nf-cid/src/resources/META-INF/extensions.idx +++ b/modules/nf-lineage/src/resources/META-INF/extensions.idx @@ -14,6 +14,6 @@ # limitations under the License. # -nextflow.data.cid.DefaultCidStoreFactory -nextflow.data.cid.CidObserverFactory -nextflow.data.cid.cli.CidCommandImpl +nextflow.lineage.DefaultLinStoreFactory +nextflow.lineage.LinObserverFactory +nextflow.lineage.cli.LinCommandImpl diff --git a/modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/modules/nf-lineage/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider similarity index 93% rename from modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider rename to modules/nf-lineage/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider index d68acbd6d1..4c5e0f213d 100644 --- a/modules/nf-cid/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider +++ b/modules/nf-lineage/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider @@ -14,4 +14,4 @@ # limitations under the License. # -nextflow.data.cid.fs.CidFileSystemProvider +nextflow.lineage.fs.LinFileSystemProvider diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinHistoryLogTest.groovy similarity index 61% rename from modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/DefaultLinHistoryLogTest.groovy index 64e922d18e..bde00b0595 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidHistoryLogTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinHistoryLogTest.groovy @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import spock.lang.Specification @@ -21,20 +21,20 @@ import java.nio.file.Files import java.nio.file.Path /** - * CID History file tests + * Lineage History file tests * * @author Jorge Ejarque */ -class DefaultCidHistoryLogTest extends Specification { +class DefaultLinHistoryLogTest extends Specification { Path tempDir Path historyFile - DefaultCidHistoryLog cidHistoryLog + DefaultLinHistoryLog linHistoryLog def setup() { tempDir = Files.createTempDirectory("wdir") - historyFile = tempDir.resolve("cid-history") - cidHistoryLog = new DefaultCidHistoryLog(historyFile) + historyFile = tempDir.resolve("lin-history") + linHistoryLog = new DefaultLinHistoryLog(historyFile) } def cleanup(){ @@ -45,60 +45,59 @@ class DefaultCidHistoryLogTest extends Specification { given: UUID sessionId = UUID.randomUUID() String runName = "TestRun" - String runCid = "cid://123" + String runLid = "lid://123" when: - cidHistoryLog.write(runName, sessionId, runCid) + linHistoryLog.write(runName, sessionId, runLid) then: def files = historyFile.listFiles() files.size() == 1 - def parsedRecord = CidHistoryRecord.parse(files[0].text) + def parsedRecord = LinHistoryRecord.parse(files[0].text) parsedRecord.sessionId == sessionId parsedRecord.runName == runName - parsedRecord.runCid == runCid + parsedRecord.runLid == runLid } def "should return correct record for existing session"() { given: UUID sessionId = UUID.randomUUID() String runName = "Run1" - String runCid = "cid://123" + String runLid = "lid://123" and: - cidHistoryLog.write(runName, sessionId, runCid) + linHistoryLog.write(runName, sessionId, runLid) when: - def record = cidHistoryLog.getRecord(sessionId) + def record = linHistoryLog.getRecord(sessionId) then: record.sessionId == sessionId record.runName == runName - record.runCid == runCid + record.runLid == runLid } def "should return null and warn if session does not exist"() { expect: - cidHistoryLog.getRecord(UUID.randomUUID()) == null + linHistoryLog.getRecord(UUID.randomUUID()) == null } - def "update should modify existing Cid for given session"() { + def "update should modify existing Lid for given session"() { given: UUID sessionId = UUID.randomUUID() String runName = "Run1" - String runCidUpdated = "run-cid-updated" - String resultsCidUpdated = "results-cid-updated" + String runLidUpdated = "run-lid-updated" and: - cidHistoryLog.write(runName, sessionId, 'run-cid-initial') + linHistoryLog.write(runName, sessionId, 'run-lid-initial') when: - cidHistoryLog.updateRunCid(sessionId, runCidUpdated) + linHistoryLog.updateRunLid(sessionId, runLidUpdated) then: def files = historyFile.listFiles() files.size() == 1 - def parsedRecord = CidHistoryRecord.parse(files[0].text) - parsedRecord.runCid == runCidUpdated + def parsedRecord = LinHistoryRecord.parse(files[0].text) + parsedRecord.runLid == runLidUpdated } def "update should do nothing if session does not exist"() { @@ -106,34 +105,34 @@ class DefaultCidHistoryLogTest extends Specification { UUID existingSessionId = UUID.randomUUID() UUID nonExistingSessionId = UUID.randomUUID() String runName = "Run1" - String runCid = "cid://123" + String runLid = "lid://123" and: - cidHistoryLog.write(runName, existingSessionId, runCid) + linHistoryLog.write(runName, existingSessionId, runLid) when: - cidHistoryLog.updateRunCid(nonExistingSessionId, "new-cid") + linHistoryLog.updateRunLid(nonExistingSessionId, "new-lid") then: def files = historyFile.listFiles() files.size() == 1 - def parsedRecord = CidHistoryRecord.parse(files[0].text) - parsedRecord.runCid == runCid + def parsedRecord = LinHistoryRecord.parse(files[0].text) + parsedRecord.runLid == runLid } def 'should get records' () { given: UUID sessionId = UUID.randomUUID() String runName = "Run1" - String runCid = "cid://123" + String runLid = "lid://123" and: - cidHistoryLog.write(runName, sessionId, runCid) + linHistoryLog.write(runName, sessionId, runLid) when: - def records = cidHistoryLog.getRecords() + def records = linHistoryLog.getRecords() then: records.size() == 1 records[0].sessionId == sessionId records[0].runName == runName - records[0].runCid == runCid + records[0].runLid == runLid } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreFactoryTest.groovy similarity index 85% rename from modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreFactoryTest.groovy index 1ec482e71c..53d031703a 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreFactoryTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreFactoryTest.groovy @@ -14,9 +14,9 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage -import nextflow.data.config.DataConfig +import nextflow.lineage.config.LineageConfig import spock.lang.Specification import spock.lang.Unroll @@ -24,13 +24,13 @@ import spock.lang.Unroll * * @author Paolo Di Tommaso */ -class DefaultCidStoreFactoryTest extends Specification { +class DefaultLinStoreFactoryTest extends Specification { @Unroll def 'should validate can open' () { given: - def factory = new DefaultCidStoreFactory() - def config = new DataConfig(CONFIG) + def factory = new DefaultLinStoreFactory() + def config = new LineageConfig(CONFIG) expect: factory.canOpen(config) == EXPECTED diff --git a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy similarity index 67% rename from modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy index 8b20bf0823..e2443d27ad 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/DefaultCidStoreTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy @@ -14,9 +14,9 @@ * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage -import nextflow.data.cid.model.Annotation +import nextflow.lineage.model.Annotation import java.nio.file.Files import java.nio.file.Path @@ -24,14 +24,14 @@ import java.time.Instant import java.time.OffsetDateTime import java.time.ZoneOffset -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowRun -import nextflow.data.cid.serde.CidEncoder -import nextflow.data.config.DataConfig +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.config.LineageConfig import spock.lang.Specification import spock.lang.TempDir @@ -39,69 +39,69 @@ import spock.lang.TempDir * * @author Jorge Ejarque */ -class DefaultCidStoreTest extends Specification { +class DefaultLinStoreTest extends Specification { @TempDir Path tempDir Path storeLocation Path metaLocation - DataConfig config + LineageConfig config def setup() { storeLocation = tempDir.resolve("store") metaLocation = storeLocation.resolve(".meta") def configMap = [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]] - config = new DataConfig(configMap) + config = new LineageConfig(configMap) } def 'should open store'() { given: - def cidStore = new DefaultCidStore() + def store = new DefaultLinStore() when: - cidStore.open(config) - def historyLog = cidStore.getHistoryLog() + store.open(config) + def historyLog = store.getHistoryLog() then: - cidStore.getMetadataPath() == metaLocation + store.getMetadataPath() == metaLocation historyLog != null - historyLog instanceof DefaultCidHistoryLog + historyLog instanceof DefaultLinHistoryLog } def "save should store value in the correct file location"() { given: def key = "testKey" - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://workflow", "cid://task", 1234) - def cidStore = new DefaultCidStore() - cidStore.open(config) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) + def lidStore = new DefaultLinStore() + lidStore.open(config) when: - cidStore.save(key, value) + lidStore.save(key, value) then: def filePath = metaLocation.resolve("$key/.data.json") Files.exists(filePath) - filePath.text == new CidEncoder().encode(value) + filePath.text == new LinEncoder().encode(value) } def "load should retrieve stored value correctly"() { given: def key = "testKey" - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://workflow", "cid://task", 1234) - def cidStore = new DefaultCidStore() - cidStore.open(config) - cidStore.save(key, value) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value) expect: - cidStore.load(key).toString() == value.toString() + lidStore.load(key).toString() == value.toString() } def "load should return null if key does not exist"() { given: - def cidStore = new DefaultCidStore() - cidStore.open(config) + def lidStore = new DefaultLinStore() + lidStore.open(config) expect: - cidStore.load("nonexistentKey") == null + lidStore.load("nonexistentKey") == null } def 'should query' () { @@ -119,15 +119,15 @@ class DefaultCidStoreTest extends Specification { def key4 = "testKey4" def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) - def cidStore = new DefaultCidStore() - cidStore.open(config) - cidStore.save(key, value1) - cidStore.save(key2, value2) - cidStore.save(key3, value3) - cidStore.save(key4, value4) + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value1) + lidStore.save(key2, value2) + lidStore.save(key3, value3) + lidStore.save(key4, value4) when: - def results = cidStore.search("type=DataOutput&annotations.key=key2&annotations.value=value2") + def results = lidStore.search("type=DataOutput&annotations.key=key2&annotations.value=value2") then: results.size() == 2 results.keySet().containsAll([key2,key3]) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinHistoryRecordTest.groovy similarity index 66% rename from modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/LinHistoryRecordTest.groovy index 6478069051..c874ed2b1f 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidHistoryRecordTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinHistoryRecordTest.groovy @@ -13,43 +13,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import spock.lang.Specification /** - * CID History Record tests + * Lineage History Record tests * * @author Jorge Ejarque */ -class CidHistoryRecordTest extends Specification { - def "CidRecord parse should throw for invalid record"() { +class LinHistoryRecordTest extends Specification { + def "LinHistoryRecord parse should throw for invalid record"() { when: - CidHistoryRecord.parse("invalid-record") + LinHistoryRecord.parse("invalid-record") then: thrown(IllegalArgumentException) } - def "CidRecord parse should handle 4-column record"() { + def "LinHistoryRecord parse should handle 4-column record"() { given: def timestamp = new Date() - def formattedTimestamp = CidHistoryRecord.TIMESTAMP_FMT.format(timestamp) - def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tcid://123" + def formattedTimestamp = LinHistoryRecord.TIMESTAMP_FMT.format(timestamp) + def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tlid://123" when: - def record = CidHistoryRecord.parse(line) + def record = LinHistoryRecord.parse(line) then: record.timestamp != null record.runName == "run-1" - record.runCid == "cid://123" + record.runLid == "lid://123" } - def "CidRecord toString should produce tab-separated format"() { + def "LinHistoryRecord toString should produce tab-separated format"() { given: UUID sessionId = UUID.randomUUID() - def record = new CidHistoryRecord(new Date(), "TestRun", sessionId, "cid://123") + def record = new LinHistoryRecord(new Date(), "TestRun", sessionId, "lid://123") when: def line = record.toString() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy similarity index 80% rename from modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy index 3c1d459c23..0695f07806 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidObserverTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy @@ -15,10 +15,10 @@ * */ -package nextflow.data.cid +package nextflow.lineage -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskOutputs +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskOutputs import nextflow.file.FileHolder import nextflow.processor.TaskHandler import nextflow.script.TokenVar @@ -31,8 +31,9 @@ import nextflow.script.params.StdInParam import nextflow.script.params.StdOutParam import nextflow.script.params.ValueInParam import nextflow.script.params.ValueOutParam +import spock.lang.Shared -import static nextflow.data.cid.fs.CidPath.* +import static nextflow.lineage.fs.LinPath.* import java.nio.file.Files import java.nio.file.Path @@ -40,14 +41,14 @@ import java.nio.file.attribute.BasicFileAttributes import com.google.common.hash.HashCode import nextflow.Session -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutputs -import nextflow.data.cid.model.WorkflowRun -import nextflow.data.cid.serde.CidEncoder -import nextflow.data.config.DataConfig +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.config.LineageConfig import nextflow.processor.TaskConfig import nextflow.processor.TaskId import nextflow.processor.TaskProcessor @@ -59,11 +60,18 @@ import nextflow.util.CacheHelper import nextflow.util.PathNormalizer import spock.lang.Specification import spock.lang.Unroll + /** * * @author Paolo Di Tommaso */ -class CidObserverTest extends Specification { +class LinObserverTest extends Specification { + @Shared + Path lidFolder = Files.createTempDirectory("lid") + def cleanupSpec(){ + lidFolder.deleteDir() + } + def 'should normalize paths' (){ given: def folder = Files.createTempDirectory('test') @@ -78,7 +86,7 @@ class CidObserverTest extends Specification { } def params = [path: workDir.resolve("path/file.txt"), sequence: projectDir.resolve("file2.txt").toString(), value: 12] when: - def results = CidObserver.getNormalizedParams(params, new PathNormalizer(metadata)) + def results = LinObserver.getNormalizedParams(params, new PathNormalizer(metadata)) then: results.size() == 3 results.get(0).name == "path" @@ -99,8 +107,8 @@ class CidObserverTest extends Specification { given: def folder = Files.createTempDirectory('test') and: - def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def store = new DefaultLinStore(); def uniqueId = UUID.randomUUID() def scriptFile = folder.resolve("main.nf") def module1 = folder.resolve("script1.nf"); module1.text = 'hola' @@ -122,8 +130,8 @@ class CidObserverTest extends Specification { getWorkflowMetadata() >> metadata getParams() >> new ScriptBinding.ParamsMap() } - store.open(DataConfig.create(session)) - def observer = Spy(new CidObserver(session, store)) + store.open(LineageConfig.create(session)) + def observer = Spy(new LinObserver(session, store)) when: def files = observer.collectScriptDataPaths(new PathNormalizer(metadata)) @@ -149,8 +157,8 @@ class CidObserverTest extends Specification { def 'should save workflow' (){ given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def store = new DefaultLinStore(); def uniqueId = UUID.randomUUID() def scriptFile = folder.resolve("main.nf") def metadata = Mock(WorkflowMetadata){ @@ -168,8 +176,8 @@ class CidObserverTest extends Specification { getWorkflowMetadata() >> metadata getParams() >> new ScriptBinding.ParamsMap() } - store.open(DataConfig.create(session)) - def observer = new CidObserver(session, store) + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) def mainScript = new DataPath("file://${scriptFile.toString()}", new Checksum("78910", "nextflow", "standard")) def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) def workflowRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [], config) @@ -177,7 +185,7 @@ class CidObserverTest extends Specification { observer.onFlowCreate(session) observer.onFlowBegin() then: - folder.resolve(".meta/${observer.executionHash}/.data.json").text == new CidEncoder().encode(workflowRun) + folder.resolve(".meta/${observer.executionHash}/.data.json").text == new LinEncoder().encode(workflowRun) cleanup: folder?.deleteDir() @@ -185,7 +193,7 @@ class CidObserverTest extends Specification { def 'should get parameter type' () { expect: - CidObserver.getParameterType(PARAM) == STRING + LinObserver.getParameterType(PARAM) == STRING where: PARAM | STRING new FileInParam(null, []) | "path" @@ -201,7 +209,7 @@ class CidObserverTest extends Specification { def 'should save task run' () { given: def folder = Files.createTempDirectory('test').toRealPath() - def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] + def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] def uniqueId = UUID.randomUUID() def workDir = folder.resolve("work") def session = Mock(Session) { @@ -218,10 +226,10 @@ class CidObserverTest extends Specification { getWorkDir() >> workDir } and: - def store = new DefaultCidStore(); - store.open(DataConfig.create(session)) + def store = new DefaultLinStore(); + store.open(LineageConfig.create(session)) and: - def observer = new CidObserver(session, store) + def observer = new LinObserver(session, store) def normalizer = new PathNormalizer(metadata) observer.executionHash = "hash" observer.normalizer = normalizer @@ -279,21 +287,21 @@ class CidObserverTest extends Specification { getTask() >> task } - and: 'Expected CID objects' + and: 'Expected LID objects' def sourceHash = CacheHelper.hasher('echo task source').hash().toString() def scriptHash = CacheHelper.hasher('this is the script').hash().toString() - def taskDescription = new nextflow.data.cid.model.TaskRun(uniqueId.toString(), "foo", + def taskDescription = new nextflow.lineage.model.TaskRun(uniqueId.toString(), "foo", new Checksum(sourceHash, "nextflow", "standard"), new Checksum(scriptHash, "nextflow", "standard"), [ - new Parameter("path", "file1", ['cid://78567890/file1.txt']), + new Parameter("path", "file1", ['lid://78567890/file1.txt']), new Parameter("path", "file2", [[path: normalizer.normalizePath(file), checksum: [value:fileHash, algorithm: "nextflow", mode: "standard"]]]), new Parameter("val", "id", "value") - ], null, null, null, null, [:], [], "cid://hash", null) + ], null, null, null, null, [:], [], "lid://hash", null) def dataOutput1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), - "cid://1234567890", "cid://hash", "cid://1234567890", attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) + "lid://1234567890", "lid://hash", "lid://1234567890", attrs1.size(), LinUtils.toDate(attrs1.creationTime()), LinUtils.toDate(attrs1.lastModifiedTime()) ) def dataOutput2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), - "cid://1234567890", "cid://hash", "cid://1234567890", attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) + "lid://1234567890", "lid://hash", "lid://1234567890", attrs2.size(), LinUtils.toDate(attrs2.creationTime()), LinUtils.toDate(attrs2.lastModifiedTime()) ) when: observer.onProcessComplete(handler, null ) @@ -305,15 +313,15 @@ class CidObserverTest extends Specification { taskRunResult == taskDescription dataOutputResult1 == dataOutput1 dataOutputResult2 == dataOutput2 - taskOutputsResult.taskRun == "cid://1234567890" - taskOutputsResult.workflowRun == "cid://hash" + taskOutputsResult.taskRun == "lid://1234567890" + taskOutputsResult.workflowRun == "lid://hash" taskOutputsResult.outputs.size() == 3 taskOutputsResult.outputs.get(0).type == "path" taskOutputsResult.outputs.get(0).name == "file1" - taskOutputsResult.outputs.get(0).value == "cid://1234567890/fileOut1.txt" + taskOutputsResult.outputs.get(0).value == "lid://1234567890/fileOut1.txt" taskOutputsResult.outputs.get(1).type == "path" taskOutputsResult.outputs.get(1).name == "file2" - taskOutputsResult.outputs.get(1).value == ["cid://1234567890/fileOut2.txt"] + taskOutputsResult.outputs.get(1).value == ["lid://1234567890/fileOut2.txt"] taskOutputsResult.outputs.get(2).type == "val" taskOutputsResult.outputs.get(2).name == "id" taskOutputsResult.outputs.get(2).value == "value" @@ -325,13 +333,13 @@ class CidObserverTest extends Specification { def 'should save task data output' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def store = new DefaultLinStore(); def session = Mock(Session) { getConfig()>>config } - store.open(DataConfig.create(session)) - def observer = Spy(new CidObserver(session, store)) + store.open(LineageConfig.create(session)) + def observer = Spy(new LinObserver(session, store)) observer.executionHash = "hash" and: def workDir = folder.resolve('12/34567890') @@ -353,14 +361,14 @@ class CidObserverTest extends Specification { and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) def output = new DataOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), - "cid://15cd5b07", "cid://hash", "cid://15cd5b07", attrs.size(), CidUtils.toDate(attrs.creationTime()), CidUtils.toDate(attrs.lastModifiedTime()) ) + "lid://15cd5b07", "lid://hash", "lid://15cd5b07", attrs.size(), LinUtils.toDate(attrs.creationTime()), LinUtils.toDate(attrs.lastModifiedTime()) ) and: observer.readAttributes(outFile) >> attrs when: observer.storeTaskOutput(task, outFile) then: - folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text == new CidEncoder().encode(output) + folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text == new LinEncoder().encode(output) cleanup: folder?.deleteDir() @@ -368,8 +376,8 @@ class CidObserverTest extends Specification { def 'should relativise task output dirs' (){ when: - def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); def session = Mock(Session) { getConfig()>>config } @@ -384,8 +392,8 @@ class CidObserverTest extends Specification { getWorkDir() >> WORK_DIR getConfig() >> taskConfig } - store.open(DataConfig.create(session)) - def observer = new CidObserver(session, store) + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) then: observer.getTaskRelative(task, PATH) == EXPECTED where: @@ -401,8 +409,8 @@ class CidObserverTest extends Specification { @Unroll def 'should return exception when relativize task output dirs'() { when: - def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); def session = Mock(Session) { getConfig()>>config } @@ -417,8 +425,8 @@ class CidObserverTest extends Specification { getWorkDir() >> WORK_DIR getConfig() >> taskConfig } - store.open(DataConfig.create(session)) - def observer = new CidObserver(session, store) + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) observer.getTaskRelative(task, PATH) then: def e = thrown(IllegalArgumentException) @@ -432,14 +440,14 @@ class CidObserverTest extends Specification { def 'should relativize workflow output dirs' (){ when: - def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); def session = Mock(Session) { getOutputDir()>>OUTPUT_DIR getConfig()>>config } - store.open(DataConfig.create(session)) - def observer = new CidObserver(session, store) + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) then: observer.getWorkflowRelative(PATH) == EXPECTED where: @@ -453,13 +461,13 @@ class CidObserverTest extends Specification { @Unroll def 'should return exception when relativize workflow output dirs' (){ when: - def config = [workflow:[data:[enabled: true, store:[location:'cid']]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); def session = Mock(Session) { getOutputDir()>>OUTPUT_DIR getConfig()>>config } - def observer = new CidObserver(session, store) + def observer = new LinObserver(session, store) observer.getWorkflowRelative(PATH) then: def e = thrown(IllegalArgumentException) @@ -473,8 +481,8 @@ class CidObserverTest extends Specification { def 'should save workflow output'() { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[enabled: true, store:[location:folder.toString()]]]] - def store = new DefaultCidStore(); + def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def store = new DefaultLinStore(); def outputDir = folder.resolve('results') def uniqueId = UUID.randomUUID() def scriptFile = folder.resolve("main.nf") @@ -496,16 +504,16 @@ class CidObserverTest extends Specification { getRunName()>>"test_run" getParams() >> new ScriptBinding.ParamsMap() } - store.open(DataConfig.create(session)) - def observer = new CidObserver(session, store) - def encoder = new CidEncoder() + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) + def encoder = new LinEncoder() when: 'Starting workflow' observer.onFlowCreate(session) observer.onFlowBegin() then: 'History file should contain execution hash' - def cid = store.getHistoryLog().getRecord(uniqueId).runCid.substring(CID_PROT.size()) - cid == observer.executionHash + def lid = store.getHistoryLog().getRecord(uniqueId).runLid.substring(LID_PROT.size()) + lid == observer.executionHash when: ' publish output with source file' def outFile1 = outputDir.resolve('foo/file.bam') @@ -517,12 +525,12 @@ class CidObserverTest extends Specification { observer.onFilePublish(outFile1, sourceFile1) observer.onWorkflowPublish("a", outFile1) - then: 'check file 1 output metadata in cid store' + then: 'check file 1 output metadata in lid store' def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() def output1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), - "cid://123987/file.bam", "$CID_PROT${observer.executionHash}", null, - attrs1.size(), CidUtils.toDate(attrs1.creationTime()), CidUtils.toDate(attrs1.lastModifiedTime()) ) + "lid://123987/file.bam", "$LID_PROT${observer.executionHash}", null, + attrs1.size(), LinUtils.toDate(attrs1.creationTime()), LinUtils.toDate(attrs1.lastModifiedTime()) ) folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == encoder.encode(output1) when: 'publish without source path' @@ -533,18 +541,18 @@ class CidObserverTest extends Specification { def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() observer.onFilePublish(outFile2) observer.onWorkflowPublish("b", outFile2) - then: 'Check outFile2 metadata in cid store' + then: 'Check outFile2 metadata in lid store' def output2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), - "cid://${observer.executionHash}" , "cid://${observer.executionHash}", null, - attrs2.size(), CidUtils.toDate(attrs2.creationTime()), CidUtils.toDate(attrs2.lastModifiedTime()) ) + "lid://${observer.executionHash}" , "lid://${observer.executionHash}", null, + attrs2.size(), LinUtils.toDate(attrs2.creationTime()), LinUtils.toDate(attrs2.lastModifiedTime()) ) folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == encoder.encode(output2) when: 'Workflow complete' observer.onFlowComplete() - then: 'Check history file is updated and Workflow Result is written in the cid store' - def finalCid = store.getHistoryLog().getRecord(uniqueId).runCid.substring(CID_PROT.size()) - def resultsRetrieved = store.load("${finalCid}#outputs") as WorkflowOutputs - resultsRetrieved.outputs == [new Parameter(Path.simpleName, "a", "cid://${observer.executionHash}/foo/file.bam"), new Parameter(Path.simpleName, "b", "cid://${observer.executionHash}/foo/file2.bam")] + then: 'Check history file is updated and Workflow Result is written in the lid store' + def finalLid = store.getHistoryLog().getRecord(uniqueId).runLid.substring(LID_PROT.size()) + def resultsRetrieved = store.load("${finalLid}#outputs") as WorkflowOutputs + resultsRetrieved.outputs == [new Parameter(Path.simpleName, "a", "lid://${observer.executionHash}/foo/file.bam"), new Parameter(Path.simpleName, "b", "lid://${observer.executionHash}/foo/file2.bam")] cleanup: folder?.deleteDir() diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy similarity index 80% rename from modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy index 6b4fe5930f..98038aadf8 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidPropertyValidationTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy @@ -13,26 +13,26 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package nextflow.data.cid +package nextflow.lineage import spock.lang.Specification /** * @author Jorge Ejarque */ -class CidPropertyValidationTest extends Specification{ +class LinPropertyValidationTest extends Specification{ def 'should throw exception when property does not exist'(){ when: - new CidPropertyValidator().validate(['value','not_existing']) + new LinPropertyValidator().validate(['value', 'not_existing']) then: def e = thrown(IllegalArgumentException) - e.message.startsWith( "Property 'not_existing' doesn't exist in the CID model") + e.message.startsWith( "Property 'not_existing' doesn't exist in the lineage model") } def 'should not throw exception when property exist'(){ when: - new CidPropertyValidator().validate(['value', 'outputs']) + new LinPropertyValidator().validate(['value', 'outputs']) then: noExceptionThrown() } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy similarity index 77% rename from modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy index 1002a6835c..895db4d1ea 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/CidUtilsTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy @@ -14,15 +14,15 @@ * limitations under the License. */ -package nextflow.data.cid - -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutputs -import nextflow.data.cid.model.WorkflowRun -import nextflow.data.config.DataConfig +package nextflow.lineage + +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.config.LineageConfig import spock.lang.Specification import spock.lang.TempDir @@ -32,23 +32,23 @@ import java.time.Instant import java.time.OffsetDateTime import java.time.ZoneOffset -class CidUtilsTest extends Specification{ +class LinUtilsTest extends Specification{ @TempDir Path tempDir Path storeLocation - DataConfig config + LineageConfig config def setup() { storeLocation = tempDir.resolve("store") def configMap = [enabled: true, store: [location: storeLocation.toString()]] - config = new DataConfig(configMap) + config = new LineageConfig(configMap) } def 'should convert to Date'(){ expect: - CidUtils.toDate(FILE_TIME) == DATE + LinUtils.toDate(FILE_TIME) == DATE where: FILE_TIME | DATE null | null @@ -57,7 +57,7 @@ class CidUtilsTest extends Specification{ def 'should convert to FileTime'(){ expect: - CidUtils.toFileTime(DATE) == FILE_TIME + LinUtils.toFileTime(DATE) == FILE_TIME where: FILE_TIME | DATE null | null @@ -72,21 +72,21 @@ class CidUtilsTest extends Specification{ def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) - def outputs1 = new WorkflowOutputs(OffsetDateTime.now(), "cid://testKey", [new Parameter( "String", "output", "name")] ) - def cidStore = new DefaultCidStore() - cidStore.open(config) - cidStore.save(key, value1) - cidStore.save("$key#outputs", outputs1) + def outputs1 = new WorkflowOutputs(OffsetDateTime.now(), "lid://testKey", [new Parameter( "String", "output", "name")] ) + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value1) + lidStore.save("$key#outputs", outputs1) when: - List params = CidUtils.query(cidStore, new URI('cid://testKey#params')) + List params = LinUtils.query(lidStore, new URI('lid://testKey#params')) then: params.size() == 1 params[0] instanceof List (params[0] as List).size() == 2 when: - List outputs = CidUtils.query(cidStore, new URI('cid://testKey#outputs')) + List outputs = LinUtils.query(lidStore, new URI('lid://testKey#outputs')) then: outputs.size() == 1 outputs[0] instanceof List @@ -94,24 +94,24 @@ class CidUtilsTest extends Specification{ param.name == "output" when: - CidUtils.query(cidStore, new URI('cid://testKey#no-exist')) + LinUtils.query(lidStore, new URI('lid://testKey#no-exist')) then: thrown(IllegalArgumentException) when: - CidUtils.query(cidStore, new URI('cid://testKey#outputs.no-exist')) + LinUtils.query(lidStore, new URI('lid://testKey#outputs.no-exist')) then: thrown(IllegalArgumentException) when: - CidUtils.query(cidStore, new URI('cid://no-exist#something')) + LinUtils.query(lidStore, new URI('lid://no-exist#something')) then: thrown(IllegalArgumentException) } def "should parse children elements form Fragment string"() { expect: - CidUtils.parseChildrenFormFragment(FRAGMENT) == EXPECTED as String[] + LinUtils.parseChildrenFormFragment(FRAGMENT) == EXPECTED as String[] where: FRAGMENT | EXPECTED @@ -123,7 +123,7 @@ class CidUtilsTest extends Specification{ def "should parse a query string as Map"() { expect: - CidUtils.parseQuery(QUERY_STRING) == EXPECTED + LinUtils.parseQuery(QUERY_STRING) == EXPECTED where: QUERY_STRING | EXPECTED @@ -138,7 +138,7 @@ class CidUtilsTest extends Specification{ def obj = [ "type": "value", "workflow": ["repository": "subvalue"], "outputs" : [ ["path":"/to/file"],["path":"file2"] ] ] expect: - CidUtils.checkParams(obj, PARAMS) == EXPECTED + LinUtils.checkParams(obj, PARAMS) == EXPECTED where: PARAMS | EXPECTED @@ -154,7 +154,7 @@ class CidUtilsTest extends Specification{ def 'should parse query' (){ expect: - CidUtils.parseQuery(PARAMS) == EXPECTED + LinUtils.parseQuery(PARAMS) == EXPECTED where: PARAMS | EXPECTED "type=value" | ["type": "value"] @@ -173,7 +173,7 @@ class CidUtilsTest extends Specification{ ] expect: - CidUtils.navigate(obj, PATH) == EXPECTED + LinUtils.navigate(obj, PATH) == EXPECTED where: PATH | EXPECTED @@ -187,7 +187,7 @@ class CidUtilsTest extends Specification{ def results = [] when: - CidUtils.treatObject(OBJECT, PARAMS, results) + LinUtils.treatObject(OBJECT, PARAMS, results) then: results == EXPECTED @@ -210,11 +210,11 @@ class CidUtilsTest extends Specification{ def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) - def cidStore = new DefaultCidStore() - cidStore.open(config) - cidStore.save(key, value1) + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value1) when: - def result = CidUtils.searchPath(cidStore, key, ["name":"param1"], ["params"] as String[]) + def result = LinUtils.searchPath(lidStore, key, ["name":"param1"], ["params"] as String[]) then: result == [new Parameter("String", "param1", "value1")] @@ -227,13 +227,13 @@ class CidUtilsTest extends Specification{ def wfRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", [key: "value1"]), new Parameter("String", "param2", "value2")]) expect: - CidUtils.navigate(wfRun, "workflow.commitId") == "123456" - CidUtils.navigate(wfRun, "params.name") == ["param1", "param2"] - CidUtils.navigate(wfRun, "params.value.key") == "value1" - CidUtils.navigate(wfRun, "params.value.no-exist") == null - CidUtils.navigate(wfRun, "params.no-exist") == null - CidUtils.navigate(wfRun, "no-exist") == null - CidUtils.navigate(null, "something") == null + LinUtils.navigate(wfRun, "workflow.commitId") == "123456" + LinUtils.navigate(wfRun, "params.name") == ["param1", "param2"] + LinUtils.navigate(wfRun, "params.value.key") == "value1" + LinUtils.navigate(wfRun, "params.value.no-exist") == null + LinUtils.navigate(wfRun, "params.no-exist") == null + LinUtils.navigate(wfRun, "no-exist") == null + LinUtils.navigate(null, "something") == null } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy similarity index 62% rename from modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index 4a82216c57..d36027f742 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/cli/CidCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -14,22 +14,22 @@ * limitations under the License. */ -package nextflow.data.cid.cli +package nextflow.lineage.cli import nextflow.SysEnv import nextflow.config.ConfigMap import nextflow.dag.MermaidHtmlRenderer -import nextflow.data.cid.CidHistoryRecord -import nextflow.data.cid.CidStoreFactory -import nextflow.data.cid.DefaultCidHistoryLog -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowRun -import nextflow.data.cid.serde.CidEncoder +import nextflow.lineage.LinHistoryRecord +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.DefaultLinHistoryLog +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder import nextflow.plugin.Plugins import org.junit.Rule import spock.lang.Specification @@ -41,7 +41,7 @@ import java.time.Instant import java.time.OffsetDateTime import java.time.ZoneOffset -class CidCommandImplTest extends Specification{ +class LinCommandImplTest extends Specification{ @TempDir Path tmpDir @@ -53,17 +53,17 @@ class CidCommandImplTest extends Specification{ // clear the environment to avoid the local env pollute the test env SysEnv.push([:]) storeLocation = tmpDir.resolve("store") - configMap = new ConfigMap([workflow:[ data: [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]]]]) + configMap = new ConfigMap([workflow: [lineage: [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]]]]) } def cleanup() { Plugins.stop() - CidStoreFactory.reset() + LinStoreFactory.reset() SysEnv.pop() } def setupSpec() { - CidStoreFactory.reset() + LinStoreFactory.reset() } /* * Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html @@ -71,16 +71,16 @@ class CidCommandImplTest extends Specification{ @Rule OutputCapture capture = new OutputCapture() - def 'should print executions cids' (){ + def 'should print executions lids' (){ given: def historyFile = storeLocation.resolve(".meta/.history") - def cidLog = new DefaultCidHistoryLog(historyFile) + def lidLog = new DefaultLinHistoryLog(historyFile) def uniqueId = UUID.randomUUID() def date = new Date(); - def recordEntry = "${CidHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tcid://123456".toString() - cidLog.write("run_name", uniqueId, "cid://123456", date) + def recordEntry = "${LinHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tlid://123456".toString() + lidLog.write("run_name", uniqueId, "lid://123456", date) when: - new CidCommandImpl().log(configMap) + new LinCommandImpl().log(configMap) def stdout = capture .toString() .readLines()// remove the log part @@ -99,7 +99,7 @@ class CidCommandImplTest extends Specification{ Files.createDirectories(historyFile.parent) when: - new CidCommandImpl().log(configMap) + new LinCommandImpl().log(configMap) def stdout = capture .toString() .readLines()// remove the log part @@ -110,23 +110,23 @@ class CidCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "No workflow runs CIDs found." + stdout[0] == "No workflow runs LIDs found." } - def 'should show cid content' (){ + def 'should show lid content' (){ given: - def cidFile = storeLocation.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def encoder = new CidEncoder().withPrettyPrint(true) + def encoder = new LinEncoder().withPrettyPrint(true) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam","cid://123987/", null, 1234, time, time, null) + "lid://123987/file.bam","lid://123987/", null, 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer - cidFile.text = jsonSer + lidFile.text = jsonSer when: - new CidCommandImpl().show(configMap, ["cid://12345"]) + new LinCommandImpl().describe(configMap, ["lid://12345"]) def stdout = capture .toString() .readLines()// remove the log part @@ -139,11 +139,11 @@ class CidCommandImplTest extends Specification{ stdout.join('\n') == expectedOutput } - def 'should warn if no cid content' (){ + def 'should warn if no lid content' (){ given: when: - new CidCommandImpl().show(configMap, ["cid://12345"]) + new LinCommandImpl().describe(configMap, ["lid://12345"]) def stdout = capture .toString() .readLines()// remove the log part @@ -153,70 +153,70 @@ class CidCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "Error loading cid://12345. Cid object 12345 not found." + stdout[0] == "Error loading lid://12345. Lineage object 12345 not found." } - def 'should get lineage cid content' (){ + def 'should get lineage lid content' (){ given: def outputHtml = tmpDir.resolve('lineage.html') - def cidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") - def cidFile2 = storeLocation.resolve(".meta/123987/file.bam/.data.json") - def cidFile3 = storeLocation.resolve(".meta/123987/.data.json") - def cidFile4 = storeLocation.resolve(".meta/45678/output.txt/.data.json") - def cidFile5 = storeLocation.resolve(".meta/45678/.data.json") - Files.createDirectories(cidFile.parent) - Files.createDirectories(cidFile2.parent) - Files.createDirectories(cidFile3.parent) - Files.createDirectories(cidFile4.parent) - Files.createDirectories(cidFile5.parent) - def encoder = new CidEncoder() + def lidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") + def lidFile2 = storeLocation.resolve(".meta/123987/file.bam/.data.json") + def lidFile3 = storeLocation.resolve(".meta/123987/.data.json") + def lidFile4 = storeLocation.resolve(".meta/45678/output.txt/.data.json") + def lidFile5 = storeLocation.resolve(".meta/45678/.data.json") + Files.createDirectories(lidFile.parent) + Files.createDirectories(lidFile2.parent) + Files.createDirectories(lidFile3.parent) + Files.createDirectories(lidFile4.parent) + Files.createDirectories(lidFile5.parent) + def encoder = new LinEncoder() def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://45678", null, 1234, time, time, null) - cidFile.text = encoder.encode(entry) + "lid://123987/file.bam", "lid://45678", null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987", "cid://45678", "cid://123987", 1234, time, time, null) - cidFile2.text = encoder.encode(entry) + "lid://123987", "lid://45678", "lid://123987", 1234, time, time, null) + lidFile2.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), new Checksum("abfsc2375","nextflow","standard"), [new Parameter( "ValueInParam", "sample_id","ggal_gut"), - new Parameter("FileInParam","reads",["cid://45678/output.txt"]), + new Parameter("FileInParam","reads",["lid://45678/output.txt"]), new Parameter("FileInParam","input",[new DataPath("path/to/file",new Checksum("45372qe","nextflow","standard"))]) ], null, null, null, null, [:],[], null) - cidFile3.text = encoder.encode(entry) + lidFile3.text = encoder.encode(entry) entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://45678", "cid://45678", null, 1234, time, time, null) - cidFile4.text = encoder.encode(entry) + "lid://45678", "lid://45678", null, 1234, time, time, null) + lidFile4.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), new Checksum("abfsc2375","nextflow","standard"), null,null, null, null, null, [:],[], null) - cidFile5.text = encoder.encode(entry) + lidFile5.text = encoder.encode(entry) final network = """flowchart BT - cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} - cid://123987/file.bam@{shape: document, label: "cid://123987/file.bam"} - cid://123987@{shape: process, label: "foo"} + lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} + lid://123987/file.bam@{shape: document, label: "lid://123987/file.bam"} + lid://123987@{shape: process, label: "foo"} ggal_gut@{shape: document, label: "ggal_gut"} path/to/file@{shape: document, label: "path/to/file"} - cid://45678/output.txt@{shape: document, label: "cid://45678/output.txt"} - cid://45678@{shape: process, label: "bar"} - - cid://123987/file.bam -->cid://12345/file.bam - cid://123987 -->cid://123987/file.bam - ggal_gut -->cid://123987 - cid://45678/output.txt -->cid://123987 - path/to/file -->cid://123987 - cid://45678 -->cid://45678/output.txt + lid://45678/output.txt@{shape: document, label: "lid://45678/output.txt"} + lid://45678@{shape: process, label: "bar"} + + lid://123987/file.bam -->lid://12345/file.bam + lid://123987 -->lid://123987/file.bam + ggal_gut -->lid://123987 + lid://45678/output.txt -->lid://123987 + path/to/file -->lid://123987 + lid://45678 -->lid://45678/output.txt """ final template = MermaidHtmlRenderer.readTemplate() def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) when: - new CidCommandImpl().lineage(configMap, ["cid://12345/file.bam", outputHtml.toString()]) + new LinCommandImpl().render(configMap, ["lid://12345/file.bam", outputHtml.toString()]) def stdout = capture .toString() .readLines()// remove the log part @@ -226,45 +226,45 @@ class CidCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "Linage graph for cid://12345/file.bam rendered in ${outputHtml}" + stdout[0] == "Linage graph for lid://12345/file.bam rendered in ${outputHtml}" outputHtml.exists() outputHtml.text == expectedOutput } - def 'should get lineage from workflow cid content' (){ + def 'should get lineage from workflow lid content' (){ given: def outputHtml = tmpDir.resolve('lineage.html') - def cidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") - def cidFile3 = storeLocation.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) - Files.createDirectories(cidFile3.parent) - def encoder = new CidEncoder() + def lidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") + def lidFile3 = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + Files.createDirectories(lidFile3.parent) + def encoder = new LinEncoder() def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://12345", "cid://12345", null, 1234, time, time, null) - cidFile.text = encoder.encode(entry) + "lid://12345", "lid://12345", null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) def wf = new Workflow([new DataPath("/path/to/main.nf)")], "hello-nf", "aasdklk") entry = new WorkflowRun(wf,"sessionId","run_name", [new Parameter( "String", "sample_id","ggal_gut"), new Parameter("Integer","reads",2)]) - cidFile3.text = encoder.encode(entry) + lidFile3.text = encoder.encode(entry) final network = """flowchart BT - cid://12345/file.bam@{shape: document, label: "cid://12345/file.bam"} - cid://12345@{shape: processes, label: "run_name"} + lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} + lid://12345@{shape: processes, label: "run_name"} ggal_gut@{shape: document, label: "ggal_gut"} 2.0@{shape: document, label: "2.0"} - cid://12345 -->cid://12345/file.bam - ggal_gut -->cid://12345 - 2.0 -->cid://12345 + lid://12345 -->lid://12345/file.bam + ggal_gut -->lid://12345 + 2.0 -->lid://12345 """ final template = MermaidHtmlRenderer.readTemplate() def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) when: - new CidCommandImpl().lineage(configMap, ["cid://12345/file.bam", outputHtml.toString()]) + new LinCommandImpl().render(configMap, ["lid://12345/file.bam", outputHtml.toString()]) def stdout = capture .toString() .readLines()// remove the log part @@ -274,24 +274,24 @@ class CidCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "Linage graph for cid://12345/file.bam rendered in ${outputHtml}" + stdout[0] == "Linage graph for lid://12345/file.bam rendered in ${outputHtml}" outputHtml.exists() outputHtml.text == expectedOutput } def 'should show query results'(){ given: - def cidFile = storeLocation.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) - def encoder = new CidEncoder().withPrettyPrint(true) + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer - cidFile.text = jsonSer + lidFile.text = jsonSer when: - new CidCommandImpl().show(configMap, ["cid:///?type=DataOutput"]) + new LinCommandImpl().describe(configMap, ["lid:///?type=DataOutput"]) def stdout = capture .toString() .readLines()// remove the log part @@ -306,22 +306,22 @@ class CidCommandImplTest extends Specification{ def 'should show query with fragment'(){ given: - def cidFile = storeLocation.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) - def cidFile2 = storeLocation.resolve(".meta/67890/.data.json") - Files.createDirectories(cidFile2.parent) - def encoder = new CidEncoder().withPrettyPrint(true) + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def lidFile2 = storeLocation.resolve(".meta/67890/.data.json") + Files.createDirectories(lidFile2.parent) + def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), - "cid://123987/file2.bam", "cid://123987/", null, 1235, time, time, null) + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) def expectedOutput1 = '[\n "path/to/file",\n "path/to/file2"\n]' def expectedOutput2 = '[\n "path/to/file2",\n "path/to/file"\n]' - cidFile.text = encoder.encode(entry) - cidFile2.text = encoder.encode(entry2) + lidFile.text = encoder.encode(entry) + lidFile2.text = encoder.encode(entry2) when: - new CidCommandImpl().show(configMap, ["cid:///?type=DataOutput#path"]) + new LinCommandImpl().describe(configMap, ["lid:///?type=DataOutput#path"]) def stdout = capture .toString() .readLines()// remove the log part @@ -335,18 +335,18 @@ class CidCommandImplTest extends Specification{ def 'should diff'(){ given: - def cidFile = storeLocation.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) - def cidFile2 = storeLocation.resolve(".meta/67890/.data.json") - Files.createDirectories(cidFile2.parent) - def encoder = new CidEncoder().withPrettyPrint(true) + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def lidFile2 = storeLocation.resolve(".meta/67890/.data.json") + Files.createDirectories(lidFile2.parent) + def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), - "cid://123987/file2.bam", "cid://123987/", null, 1235, time, time, null) - cidFile.text = encoder.encode(entry) - cidFile2.text = encoder.encode(entry2) + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) + lidFile.text = encoder.encode(entry) + lidFile2.text = encoder.encode(entry2) def expectedOutput = '''diff --git 12345 67890 --- 12345 +++ 67890 @@ -361,9 +361,9 @@ class CidCommandImplTest extends Specification{ "algorithm": "nextflow", "mode": "standard" }, -- "source": "cid://123987/file.bam", -+ "source": "cid://123987/file2.bam", - "workflowRun": "cid://123987/", +- "source": "lid://123987/file.bam", ++ "source": "lid://123987/file2.bam", + "workflowRun": "lid://123987/", "taskRun": null, - "size": 1234, + "size": 1235, @@ -373,7 +373,7 @@ class CidCommandImplTest extends Specification{ ''' when: - new CidCommandImpl().diff(configMap, ["cid://12345", "cid://67890"]) + new LinCommandImpl().diff(configMap, ["lid://12345", "lid://67890"]) def stdout = capture .toString() .readLines()// remove the log part @@ -387,17 +387,17 @@ class CidCommandImplTest extends Specification{ def 'should print error if no entry found diff'(){ given: - def cidFile = storeLocation.resolve(".meta/12345/.data.json") - Files.createDirectories(cidFile.parent) - def encoder = new CidEncoder().withPrettyPrint(true) + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) - cidFile.text = encoder.encode(entry) + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) when: - new CidCommandImpl().diff(configMap, ["cid://89012", "cid://12345"]) - new CidCommandImpl().diff(configMap, ["cid://12345", "cid://67890"]) + new LinCommandImpl().diff(configMap, ["lid://89012", "lid://12345"]) + new LinCommandImpl().diff(configMap, ["lid://12345", "lid://67890"]) def stdout = capture .toString() .readLines()// remove the log part @@ -407,17 +407,17 @@ class CidCommandImplTest extends Specification{ then: stdout.size() == 2 - stdout[0] == "No entry found for cid://89012." - stdout[1] == "No entry found for cid://67890." + stdout[0] == "No entry found for lid://89012." + stdout[1] == "No entry found for lid://67890." } def 'should print error store is not found in diff'(){ when: def config = new ConfigMap() - new CidCommandImpl().log(config) - new CidCommandImpl().show(config,["cid:///?type=DataOutput"]) - new CidCommandImpl().lineage(config,["cid://12345", "output.html"]) - new CidCommandImpl().diff(config, ["cid://89012", "cid://12345"]) + new LinCommandImpl().log(config) + new LinCommandImpl().describe(config, ["lid:///?type=DataOutput"]) + new LinCommandImpl().render(config, ["lid://12345", "output.html"]) + new LinCommandImpl().diff(config, ["lid://89012", "lid://12345"]) def stdout = capture .toString() @@ -425,7 +425,7 @@ class CidCommandImplTest extends Specification{ .findResults { line -> !line.contains('DEBUG') ? line : null } .findResults { line -> !line.contains('INFO') ? line : null } .findResults { line -> !line.contains('plugin') ? line : null } - def expectedOutput = "Error CID store not loaded. Check Nextflow configuration." + def expectedOutput = "Error lineage store not loaded. Check Nextflow configuration." then: stdout.size() == 4 stdout[0] == expectedOutput @@ -436,22 +436,22 @@ class CidCommandImplTest extends Specification{ def 'should find metadata descriptions'(){ given: - def cidFile = storeLocation.resolve(".meta/123987/file.bam/.data.json") - Files.createDirectories(cidFile.parent) - def cidFile2 = storeLocation.resolve(".meta/123987/file2.bam/.data.json") - Files.createDirectories(cidFile2.parent) - def encoder = new CidEncoder().withPrettyPrint(true) + def lidFile = storeLocation.resolve(".meta/123987/file.bam/.data.json") + Files.createDirectories(lidFile.parent) + def lidFile2 = storeLocation.resolve(".meta/123987/file2.bam/.data.json") + Files.createDirectories(lidFile2.parent) + def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), - "cid://123987/file.bam", "cid://123987/", null, 1234, time, time, null) + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), - "cid://123987/file2.bam", "cid://123987/", null, 1235, time, time, null) - def expectedOutput1 = '[\n "cid://123987/file.bam",\n "cid://123987/file2.bam"\n]' - def expectedOutput2 = '[\n "cid://123987/file2.bam",\n "cid://123987/file.bam"\n]' - cidFile.text = encoder.encode(entry) - cidFile2.text = encoder.encode(entry2) + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) + def expectedOutput1 = '[\n "lid://123987/file.bam",\n "lid://123987/file2.bam"\n]' + def expectedOutput2 = '[\n "lid://123987/file2.bam",\n "lid://123987/file.bam"\n]' + lidFile.text = encoder.encode(entry) + lidFile2.text = encoder.encode(entry2) when: - new CidCommandImpl().find(configMap, ["type=DataOutput"]) + new LinCommandImpl().find(configMap, ["type=DataOutput"]) def stdout = capture .toString() .readLines()// remove the log part diff --git a/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/config/LineageConfigTest.groovy similarity index 80% rename from modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/config/LineageConfigTest.groovy index 7b3e8a7944..648b5d20a2 100644 --- a/modules/nf-cid/src/test/nextflow/data/config/DataConfigTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/config/LineageConfigTest.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.config +package nextflow.lineage.config import spock.lang.Specification @@ -22,11 +22,11 @@ import spock.lang.Specification * * @author Paolo Di Tommaso */ -class DataConfigTest extends Specification { +class LineageConfigTest extends Specification { def 'should create default config' () { when: - def config = new DataConfig(Map.of()) + def config = new LineageConfig(Map.of()) then: !config.enabled !config.store.location @@ -34,7 +34,7 @@ class DataConfigTest extends Specification { def 'should create default with enable' () { when: - def config = new DataConfig([enabled: true]) + def config = new LineageConfig([enabled: true]) then: config.enabled !config.store.location @@ -42,7 +42,7 @@ class DataConfigTest extends Specification { def 'should create data config with location' () { when: - def config = new DataConfig(enabled: true, store: [location: "/some/data/store"]) + def config = new LineageConfig(enabled: true, store: [location: "/some/data/store"]) then: config.enabled config.store.location == '/some/data/store' diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy similarity index 63% rename from modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy index 673146c2e4..97b67dccc2 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidFileSystemProviderTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy @@ -14,9 +14,9 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs -import nextflow.data.cid.DefaultCidStore +import nextflow.lineage.DefaultLinStore import spock.lang.Shared import java.nio.ByteBuffer @@ -35,10 +35,10 @@ import nextflow.Session import spock.lang.Specification /** - * CID File system provider tests + * LID File system provider tests * @author Jorge Ejarque */ -class CidFileSystemProviderTest extends Specification { +class LinFileSystemProviderTest extends Specification { @Shared def wdir = Files.createTempDirectory('wdir') @Shared def meta = wdir.resolve('.meta') @@ -53,78 +53,78 @@ class CidFileSystemProviderTest extends Specification { wdir.deleteDir() } - def 'should return cid scheme' () { + def 'should return lid scheme' () { given: - def provider = new CidFileSystemProvider() + def provider = new LinFileSystemProvider() expect: - provider.getScheme() == 'cid' + provider.getScheme() == 'lid' } - def 'should get cid path' () { + def 'should get lid path' () { given: - def cid = Mock(CidPath) + def lid = Mock(LinPath) and: - def provider = new CidFileSystemProvider() + def provider = new LinFileSystemProvider() expect: - provider.toCidPath(cid) == cid + provider.toLinPath(lid) == lid when: - provider.toCidPath(Path.of('foo')) + provider.toLinPath(Path.of('foo')) then: thrown(ProviderMismatchException) } def 'should create new file system' () { given: - def provider = new CidFileSystemProvider() + def provider = new LinFileSystemProvider() def config = [store:[location:data.toString()]] - def cid = CidPath.asUri('cid://12345') + def lid = LinPath.asUri('lid://12345') when: - def fs = provider.newFileSystem(cid, config) as CidFileSystem + def fs = provider.newFileSystem(lid, config) as LinFileSystem then: - (fs.cidStore as DefaultCidStore).location == data + (fs.store as DefaultLinStore).location == data } def 'should get a file system' () { given: - def provider = new CidFileSystemProvider() + def provider = new LinFileSystemProvider() def config = [store:[location: data.toString()]] - def uri = CidPath.asUri('cid://12345') + def uri = LinPath.asUri('lid://12345') when: provider.getFileSystem(uri) then: thrown(FileSystemNotFoundException) when: - provider.newFileSystem(uri, config) as CidFileSystem + provider.newFileSystem(uri, config) as LinFileSystem and: - def fs = provider.getFileSystem(uri) as CidFileSystem + def fs = provider.getFileSystem(uri) as LinFileSystem then: - (fs.cidStore as DefaultCidStore).location == data + (fs.store as DefaultLinStore).location == data } def 'should get or create a file system' () { given: - def config = [workflow:[data:[store:[location: data.toString()]]]] + def config = [workflow:[lineage:[store:[location: data.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: - def uri = CidPath.asUri('cid://12345') - def provider = new CidFileSystemProvider() + def uri = LinPath.asUri('lid://12345') + def provider = new LinFileSystemProvider() when: - def fs = provider.getFileSystemOrCreate(uri) as CidFileSystem + def fs = provider.getFileSystemOrCreate(uri) as LinFileSystem then: - (fs.cidStore as DefaultCidStore).location == data + (fs.store as DefaultLinStore).location == data when: - def fs2 = provider.getFileSystemOrCreate(uri) as CidFileSystem + def fs2 = provider.getFileSystemOrCreate(uri) as LinFileSystem then: fs2.is(fs) } def 'should create new byte channel' () { given: - def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] def outputMeta = meta.resolve("12345/output.txt") def output = data.resolve("output.txt") output.text = "Hello, World!" @@ -133,11 +133,11 @@ class CidFileSystemProviderTest extends Specification { Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid = provider.getPath(CidPath.asUri('cid://12345/output.txt')) + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345/output.txt')) def opts = Set.of(StandardOpenOption.READ) when: - def channel = provider.newByteChannel(cid, opts) + def channel = provider.newByteChannel(lid, opts) then: channel.isOpen() channel.position() == 0 @@ -165,12 +165,12 @@ class CidFileSystemProviderTest extends Specification { thrown(NonWritableChannelException) when: - provider.newByteChannel(cid, Set.of(StandardOpenOption.WRITE)) + provider.newByteChannel(lid, Set.of(StandardOpenOption.WRITE)) then: thrown(UnsupportedOperationException) when: - provider.newByteChannel(cid, Set.of(StandardOpenOption.APPEND)) + provider.newByteChannel(lid, Set.of(StandardOpenOption.APPEND)) then: thrown(UnsupportedOperationException) @@ -180,20 +180,20 @@ class CidFileSystemProviderTest extends Specification { output.delete() } - def 'should create new byte channel for CidMetadata' () { + def 'should create new byte channel for LinMetadata' () { given: - def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] def outputMeta = meta.resolve("12345") outputMeta.mkdirs() outputMeta.resolve(".data.json").text = '{"type":"WorkflowRun","sessionId":"session","name":"run_name","params":[{"type":"String","name":"param1","value":"value1"}]}' Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid = provider.getPath(CidPath.asUri('cid://12345#name')) + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345#name')) when: - def channel = provider.newByteChannel(cid, Set.of(StandardOpenOption.READ)) + def channel = provider.newByteChannel(lid, Set.of(StandardOpenOption.READ)) then: channel.isOpen() channel.position() == 0 @@ -223,12 +223,12 @@ class CidFileSystemProviderTest extends Specification { thrown(NonWritableChannelException) when: - provider.newByteChannel(cid, Set.of(StandardOpenOption.WRITE)) + provider.newByteChannel(lid, Set.of(StandardOpenOption.WRITE)) then: thrown(UnsupportedOperationException) when: - provider.newByteChannel(cid, Set.of(StandardOpenOption.APPEND)) + provider.newByteChannel(lid, Set.of(StandardOpenOption.APPEND)) then: thrown(UnsupportedOperationException) @@ -237,9 +237,9 @@ class CidFileSystemProviderTest extends Specification { outputMeta.deleteDir() } - def 'should read cid' () { + def 'should read lid' () { given: - def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] def outputMeta = meta.resolve("12345/output.txt") def output = data.resolve("output.txt") output.text = "Hello, World!" @@ -248,12 +248,12 @@ class CidFileSystemProviderTest extends Specification { Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid = provider.getPath(CidPath.asUri('cid://12345/output.txt')) + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345/output.txt')) def opts = Set.of(StandardOpenOption.READ) expect: - cid.text == "Hello, World!" + lid.text == "Hello, World!" cleanup: outputMeta.deleteDir() @@ -262,14 +262,14 @@ class CidFileSystemProviderTest extends Specification { def 'should not create a directory' () { given: - def config = [workflow:[data:[store:[location:'test']]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid = provider.getPath(CidPath.asUri('cid://12345')) + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345')) when: - provider.createDirectory(cid) + provider.createDirectory(lid) then: thrown(UnsupportedOperationException) @@ -288,33 +288,33 @@ class CidFileSystemProviderTest extends Specification { meta.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + output1.toString() + '"}' and: - def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid = provider.getPath(CidPath.asUri('cid://12345/output1')) - def cid2 = provider.getPath(CidPath.asUri('cid://12345')) + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345/output1')) + def lid2 = provider.getPath(LinPath.asUri('lid://12345')) expect: - Files.exists(cid) - Files.exists(cid.resolve('file1.txt')) - Files.exists(cid.resolve('file2.txt')) - Files.exists(cid.resolve('file3.txt')) + Files.exists(lid) + Files.exists(lid.resolve('file1.txt')) + Files.exists(lid.resolve('file2.txt')) + Files.exists(lid.resolve('file3.txt')) when: - provider.newDirectoryStream(cid2, (p) -> true) + provider.newDirectoryStream(lid2, (p) -> true) then: thrown(FileNotFoundException) when: - def stream = provider.newDirectoryStream(cid, (p) -> true) + def stream = provider.newDirectoryStream(lid, (p) -> true) and: def result = stream.toList() then: result.toSet() == [ - cid.resolve('file1.txt'), - cid.resolve('file2.txt'), - cid.resolve('file3.txt') + lid.resolve('file1.txt'), + lid.resolve('file2.txt'), + lid.resolve('file3.txt') ] as Set cleanup: @@ -325,14 +325,14 @@ class CidFileSystemProviderTest extends Specification { def 'should not delete a file' () { given: - def config = [workflow:[data:[store:[location:'test']]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid = provider.getPath(CidPath.asUri('cid://12345')) + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345')) when: - provider.delete(cid) + provider.delete(lid) then: thrown(UnsupportedOperationException) @@ -340,30 +340,30 @@ class CidFileSystemProviderTest extends Specification { def 'should not copy a file' () { given: - def config = [workflow:[data:[store:[location:'test']]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) - def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://54321/foo')) when: - provider.copy(cid1, cid2) + provider.copy(lid1, lid2) then: thrown(UnsupportedOperationException) } def 'should not move a file' () { given: - def config = [workflow:[data:[store:[location:'test']]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) - def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://54321/foo')) when: - provider.move(cid1, cid2) + provider.move(lid1, lid2) then: thrown(UnsupportedOperationException) } @@ -371,19 +371,19 @@ class CidFileSystemProviderTest extends Specification { def 'should check is same file' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:folder.toString()]]]] + def config = [workflow:[lineage:[store:[location:folder.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) - def cid2 = provider.getPath(CidPath.asUri('cid://54321/foo')) - def cid3 = provider.getPath(CidPath.asUri('cid://54321/foo')) + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://54321/foo')) + def lid3 = provider.getPath(LinPath.asUri('lid://54321/foo')) expect: - !provider.isSameFile(cid1, cid2) - !provider.isSameFile(cid1, cid3) + !provider.isSameFile(lid1, lid2) + !provider.isSameFile(lid1, lid3) and: - provider.isSameFile(cid2, cid3) + provider.isSameFile(lid2, lid3) cleanup: folder?.deleteDir() @@ -392,7 +392,7 @@ class CidFileSystemProviderTest extends Specification { def 'should check is hidden file' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] Global.session = Mock(Session) { getConfig()>>config } and: def output = folder.resolve('path') @@ -402,13 +402,13 @@ class CidFileSystemProviderTest extends Specification { meta.resolve('12345/output').mkdirs() meta.resolve('12345/output/.data.json').text = '{"type":"DataOutput", "path": "' + output.toString() + '"}' and: - def provider = new CidFileSystemProvider() - def cid1 = provider.getPath(CidPath.asUri('cid://12345/output/abc')) - def cid2 = provider.getPath(CidPath.asUri('cid://12345/output/.foo')) + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/output/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://12345/output/.foo')) expect: - !provider.isHidden(cid1) - provider.isHidden(cid2) + !provider.isHidden(lid1) + provider.isHidden(lid2) cleanup: folder?.deleteDir() @@ -416,18 +416,18 @@ class CidFileSystemProviderTest extends Specification { def 'should read file attributes' () { given: - def config = [workflow:[data:[store:[location:wdir.toString()]]]] + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] def file = data.resolve('abc') file.text = 'Hello' meta.resolve('12345/abc').mkdirs() meta.resolve('12345/abc/.data.json').text = '{"type":"DataOutput", "path": "' + file.toString() + '"}' Global.session = Mock(Session) { getConfig()>>config } and: - def provider = new CidFileSystemProvider() - def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) when: - def attr1 = provider.readAttributes(cid1, BasicFileAttributes) + def attr1 = provider.readAttributes(lid1, BasicFileAttributes) def real1= Files.readAttributes(file,BasicFileAttributes) then: !attr1.directory @@ -444,7 +444,9 @@ class CidFileSystemProviderTest extends Specification { def 'should throw exception in unsupported methods'() { given: - def provider = new CidFileSystemProvider() + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + def provider = new LinFileSystemProvider() when: provider.newOutputStream(null) @@ -469,17 +471,19 @@ class CidFileSystemProviderTest extends Specification { def 'should throw exception when checking access mode'(){ given: - def provider = new CidFileSystemProvider() - def cid1 = provider.getPath(CidPath.asUri('cid://12345/abc')) + def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + Global.session = Mock(Session) { getConfig()>>config } + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) when: - provider.checkAccess(cid1, AccessMode.WRITE) + provider.checkAccess(lid1, AccessMode.WRITE) then: def ex1 = thrown(AccessDeniedException) ex1.message == "Write mode not supported" when: - provider.checkAccess(cid1, AccessMode.EXECUTE) + provider.checkAccess(lid1, AccessMode.EXECUTE) then: def ex2 = thrown(AccessDeniedException) ex2.message == "Execute mode not supported" diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathFactoryTest.groovy similarity index 63% rename from modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathFactoryTest.groovy index 18cb2e0b8c..65b6318dac 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CifPathFactoryTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathFactoryTest.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.fs +package nextflow.lineage.fs import java.nio.file.Files import java.nio.file.Path @@ -25,17 +25,17 @@ import spock.lang.Specification import spock.lang.Unroll /** - * CID Path Factory tests. + * LID Path Factory tests. * * @author Jorge Ejarque */ -class CifPathFactoryTest extends Specification { +class LinPathFactoryTest extends Specification { Path tmp def setup() { tmp = Files.createTempDirectory("data") - Global.session = Mock(Session) { getConfig()>> [workflow:[data:[store:[location: tmp.toString()]]]] } + Global.session = Mock(Session) { getConfig()>> [workflow:[lineage:[store:[location: tmp.toString()]]]] } } def cleanup() { @@ -43,47 +43,47 @@ class CifPathFactoryTest extends Specification { tmp.deleteDir() } - def 'should create cid path' () { + def 'should create lin path' () { given: - def factory = new CidPathFactory() + def factory = new LinPathFactory() expect: factory.parseUri('foo') == null when: - def p1 = factory.parseUri('cid://12345') + def p1 = factory.parseUri('lid://12345') then: - p1.toUriString() == 'cid://12345' + p1.toUriString() == 'lid://12345' when: - def p2 = factory.parseUri('cid://12345/x/y/z') + def p2 = factory.parseUri('lid://12345/x/y/z') then: - p2.toUriString() == 'cid://12345/x/y/z' + p2.toUriString() == 'lid://12345/x/y/z' when: - def p3 = factory.parseUri('cid://12345//x///y/z//') + def p3 = factory.parseUri('lid://12345//x///y/z//') then: - p3.toUriString() == 'cid://12345/x/y/z' + p3.toUriString() == 'lid://12345/x/y/z' when: - factory.parseUri('cid:///12345') + factory.parseUri('lid:///12345') then: thrown(IllegalArgumentException) } @Unroll - def 'should convert get cid uri string' () { + def 'should convert get lid uri string' () { given: - def factory = new CidPathFactory() + def factory = new LinPathFactory() when: - def cid = CidPathFactory.create(EXPECTED) + def lid = LinPathFactory.create(EXPECTED) then: - factory.toUriString(cid) == EXPECTED + factory.toUriString(lid) == EXPECTED where: _ | EXPECTED - _ | 'cid://123' - _ | 'cid://123/a/b/c' + _ | 'lid://123' + _ | 'lid://123/a/b/c' } } diff --git a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy similarity index 57% rename from modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy index 7931da5de4..7bab762441 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/fs/CidPathTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy @@ -14,17 +14,16 @@ * limitations under the License. */ -package nextflow.data.cid.fs - -import nextflow.data.cid.CidUtils -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutputs -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.WorkflowRun -import nextflow.data.cid.serde.CidEncoder -import nextflow.file.FileHelper +package nextflow.lineage.fs + +import nextflow.lineage.LinUtils +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder import nextflow.util.CacheHelper import org.junit.Rule import test.OutputCapture @@ -40,26 +39,36 @@ import spock.lang.Unroll import java.time.OffsetDateTime /** - * CID Path Tests + * LID Path Tests * @author Jorge Ejarque */ -class CidPathTest extends Specification { +class LinPathTest extends Specification { - @Shared def wdir = Files.createTempDirectory('wdir') - @Shared def cid = wdir.resolve('.meta') - @Shared def data = wdir.resolve('work') - @Shared def fs = Mock(CidFileSystem) + @Shared + Path wdir + @Shared + Path meta + @Shared + Path data + @Shared + def fs = Mock(LinFileSystem) - @Rule - OutputCapture capture = new OutputCapture() + def setupSpec(){ + wdir = Files.createTempDirectory("wdir") + meta = wdir.resolve('.meta') + data = wdir.resolve('work') + } def cleanupSpec(){ wdir.deleteDir() } + @Rule + OutputCapture capture = new OutputCapture() + def 'should create from URI' () { when: - def path = new CidPath(fs, new URI( URI_STRING )) + def path = new LinPath(fs, new URI( URI_STRING )) then: path.filePath == PATH path.fragment == FRAGMENT @@ -67,19 +76,19 @@ class CidPathTest extends Specification { where: URI_STRING | PATH | QUERY | FRAGMENT - "cid://1234/hola" | "1234/hola" | null | null - "cid://1234/hola#frag.sub" | "1234/hola" | null | "frag.sub" - "cid://1234/#frag.sub" | "1234" | null | "frag.sub" - "cid://1234/?q=a&b=c" | "1234" | "q=a&b=c" | null - "cid://1234/?q=a&b=c#frag.sub" | "1234" | "q=a&b=c" | "frag.sub" - "cid:///" | "/" | null | null + "lid://1234/hola" | "1234/hola" | null | null + "lid://1234/hola#frag.sub" | "1234/hola" | null | "frag.sub" + "lid://1234/#frag.sub" | "1234" | null | "frag.sub" + "lid://1234/?q=a&b=c" | "1234" | "q=a&b=c" | null + "lid://1234/?q=a&b=c#frag.sub" | "1234" | "q=a&b=c" | "frag.sub" + "lid:///" | "/" | null | null } - def 'should create correct cid Path' () { + def 'should create correct lid Path' () { when: - def cid = new CidPath(FS, PATH, MORE) + def lid = new LinPath(FS, PATH, MORE) then: - cid.filePath == EXPECTED_FILE + lid.filePath == EXPECTED_FILE where: FS | PATH | MORE | EXPECTED_FILE fs | '/' | [] as String[] | '/' @@ -131,135 +140,134 @@ class CidPathTest extends Specification { def outputFile = data.resolve('file2.txt') outputFile.text = "this is file2" - def cidFs = new FileHelper().getOrCreateFileSystemFor('cid', [enabled: true, store: [location: cid.parent.toString()]] ) as CidFileSystem + def lidFs = new LinFileSystemProvider().newFileSystem(new URI("lid:///"), [enabled: true, store: [location: wdir.toString()]]) - cid.resolve('12345/output1').mkdirs() - cid.resolve('12345/path/to/file2.txt').mkdirs() - cid.resolve('12345/.data.json').text = '{"type":"TaskRun"}' - cid.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + outputFolder.toString() + '"}' - cid.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"DataOutput", "path": "' + outputFile.toString() + '"}' + meta.resolve('12345/output1').mkdirs() + meta.resolve('12345/path/to/file2.txt').mkdirs() + meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + outputFolder.toString() + '"}' + meta.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"DataOutput", "path": "' + outputFile.toString() + '"}' def time = OffsetDateTime.now() - def wfResultsMetadata = new CidEncoder().withPrettyPrint(true).encode(new WorkflowOutputs(time, "cid://1234", [new Parameter( "Path", "a", "cid://1234/a.txt")])) - cid.resolve('5678/').mkdirs() - cid.resolve('5678/.data.json').text = wfResultsMetadata + def wfResultsMetadata = new LinEncoder().withPrettyPrint(true).encode(new WorkflowOutputs(time, "lid://1234", [new Parameter( "Path", "a", "lid://1234/a.txt")])) + meta.resolve('5678/').mkdirs() + meta.resolve('5678/.data.json').text = wfResultsMetadata - expect: 'Get real path when CidPath is the output data or a subfolder' - new CidPath(cidFs, '12345/output1').getTargetPath() == outputFolder - new CidPath(cidFs,'12345/output1/some/path').getTargetPath() == outputSubFolder - new CidPath(cidFs,'12345/output1/some/path/file1.txt').getTargetPath().text == outputSubFolderFile.text - new CidPath(cidFs, '12345/path/to/file2.txt').getTargetPath().text == outputFile.text + expect: 'Get real path when LinPath is the output data or a subfolder' + new LinPath(lidFs, '12345/output1').getTargetPath() == outputFolder + new LinPath(lidFs,'12345/output1/some/path').getTargetPath() == outputSubFolder + new LinPath(lidFs,'12345/output1/some/path/file1.txt').getTargetPath().text == outputSubFolderFile.text + new LinPath(lidFs, '12345/path/to/file2.txt').getTargetPath().text == outputFile.text - when: 'CidPath fs is null' - new CidPath(null, '12345').getTargetPath() + when: 'LinPath fs is null' + new LinPath(null, '12345').getTargetPath() then: thrown(IllegalArgumentException) - when: 'CidPath is empty' - new CidPath(cidFs, '/').getTargetPath() + when: 'LinPath is empty' + new LinPath(lidFs, '/').getTargetPath() then: thrown(IllegalArgumentException) - when: 'CidPath is not an output data description' - new CidPath(cidFs, '12345').getTargetPath() + when: 'LinPath is not an output data description' + new LinPath(lidFs, '12345').getTargetPath() then: thrown(FileNotFoundException) - when: 'CidPath is not subfolder of an output data description' - new CidPath(cidFs, '12345/path').getTargetPath() + when: 'LinPath is not subfolder of an output data description' + new LinPath(lidFs, '12345/path').getTargetPath() then: thrown(FileNotFoundException) - when: 'CidPath subfolder of an output data description does not exist' - new CidPath(cidFs, '12345/output1/other/path').getTargetPath() + when: 'LinPath subfolder of an output data description does not exist' + new LinPath(lidFs, '12345/output1/other/path').getTargetPath() then: thrown(FileNotFoundException) - when: 'Cid does not exist' - new CidPath(cidFs, '23456').getTargetPath() + when: 'Lid does not exist' + new LinPath(lidFs, '23456').getTargetPath() then: thrown(FileNotFoundException) - when: 'Cid description' - def result = new CidPath(cidFs, '5678').getTargetOrMetadataPath() + when: 'Lid description' + def result = new LinPath(lidFs, '5678').getTargetOrMetadataPath() then: - result instanceof CidMetadataPath + result instanceof LinMetadataPath result.text == wfResultsMetadata - when: 'Cid description subobject' - def result2 = new CidPath(cidFs, '5678#outputs').getTargetOrMetadataPath() + when: 'Lid description subobject' + def result2 = new LinPath(lidFs, '5678#outputs').getTargetOrMetadataPath() then: - result2 instanceof CidMetadataPath - result2.text == CidUtils.encodeSearchOutputs([new Parameter("Path","a", "cid://1234/a.txt")], true) + result2 instanceof LinMetadataPath + result2.text == LinUtils.encodeSearchOutputs([new Parameter("Path","a", "lid://1234/a.txt")], true) - when: 'Cid subobject does not exist' - new CidPath(cidFs, '23456#notexists').getTargetOrMetadataPath() + when: 'Lid subobject does not exist' + new LinPath(lidFs, '23456#notexists').getTargetOrMetadataPath() then: thrown(IllegalArgumentException) - - cleanup: - cid.resolve('12345').deleteDir() - } def 'should get subobjects as path' (){ given: - def cidFs = new FileHelper().getOrCreateFileSystemFor('cid', [enabled: true, store: [location: cid.parent.toString()]] ) as CidFileSystem + def lidFs = new LinFileSystemProvider().newFileSystem(new URI("lid:///"), [enabled: true, store: [location: wdir.toString()]]) def wf = new WorkflowRun(new Workflow([],"repo", "commit"), "sessionId", "runId", [new Parameter("String", "param1", "value1")]) when: 'workflow repo in workflow run' - Path p = CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["workflow", "repository"] as String[]) + Path p = LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["workflow", "repository"] as String[]) then: - p instanceof CidMetadataPath + p instanceof LinMetadataPath p.text == '"repo"' when: 'outputs' - def outputs = new WorkflowOutputs(OffsetDateTime.now(), "cid://12345", [ new Parameter("Collection", "samples", ["sample1", "sample2"])]) - cidFs.cidStore.save("12345/outputs", outputs) - Path p2 = CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["outputs"] as String[]) + def outputs = new WorkflowOutputs(OffsetDateTime.now(), "lid://123456", [ new Parameter("Collection", "samples", ["sample1", "sample2"])]) + lidFs.store.save("123456/outputs", outputs) + Path p2 = LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["outputs"] as String[]) then: - p2 instanceof CidMetadataPath - p2.text == CidUtils.encodeSearchOutputs([new Parameter("Collection", "samples", ["sample1", "sample2"])], true) + p2 instanceof LinMetadataPath + p2.text == LinUtils.encodeSearchOutputs([new Parameter("Collection", "samples", ["sample1", "sample2"])], true) when: 'child does not exists' - CidPath.getMetadataAsTargetPath(wf, cidFs, "12345", ["no-exist"] as String[]) + LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["no-exist"] as String[]) then: def exception = thrown(FileNotFoundException) - exception.message == "Target path '12345#no-exist' does not exist." + exception.message == "Target path '123456#no-exist' does not exist." when: 'outputs does not exists' - CidPath.getMetadataAsTargetPath(wf, cidFs, "6789", ["outputs"] as String[]) + LinPath.getMetadataAsTargetPath(wf, lidFs, "6789", ["outputs"] as String[]) then: def exception1 = thrown(FileNotFoundException) exception1.message == "Target path '6789#outputs' does not exist." when: 'null object' - CidPath.getMetadataAsTargetPath(null, cidFs, "12345", ["no-exist"] as String[]) + LinPath.getMetadataAsTargetPath(null, lidFs, "123456", ["no-exist"] as String[]) then: def exception2 = thrown(FileNotFoundException) - exception2.message == "Target path '12345' does not exist." + exception2.message == "Target path '123456' does not exist." + + cleanup: + meta.resolve("123456").deleteDir() } def 'should get file name' () { when: - def cid1 = new CidPath(fs, '1234567890/this/file.bam') + def lid1 = new LinPath(fs, '1234567890/this/file.bam') then: - cid1.getFileName() == new CidPath(null, 'file.bam') + lid1.getFileName() == new LinPath(null, 'file.bam') } def 'should get file parent' () { when: - def cid1 = new CidPath(fs, '1234567890/this/file.bam') + def lid1 = new LinPath(fs, '1234567890/this/file.bam') then: - cid1.getParent() == new CidPath(fs, '1234567890/this') - cid1.getParent().getParent() == new CidPath(fs, '1234567890') - cid1.getParent().getParent().getParent() == new CidPath(fs, "/") - cid1.getParent().getParent().getParent().getParent() == null + lid1.getParent() == new LinPath(fs, '1234567890/this') + lid1.getParent().getParent() == new LinPath(fs, '1234567890') + lid1.getParent().getParent().getParent() == new LinPath(fs, "/") + lid1.getParent().getParent().getParent().getParent() == null } @Unroll def 'should get name count' () { expect: - new CidPath(fs, PATH).getNameCount() == EXPECTED + new LinPath(fs, PATH).getNameCount() == EXPECTED where: PATH | EXPECTED '/' | 0 @@ -273,42 +281,42 @@ class CidPathTest extends Specification { @Unroll def 'should get name by index' () { expect: - new CidPath(fs, PATH).getName(INDEX) == EXPECTED + new LinPath(fs, PATH).getName(INDEX) == EXPECTED where: PATH | INDEX | EXPECTED - '123' | 0 | new CidPath(fs, '123') - '123/a' | 1 | new CidPath(null, 'a') - '123/a/' | 1 | new CidPath(null, 'a') - '123/a/b' | 2 | new CidPath(null, 'b') + '123' | 0 | new LinPath(fs, '123') + '123/a' | 1 | new LinPath(null, 'a') + '123/a/' | 1 | new LinPath(null, 'a') + '123/a/b' | 2 | new LinPath(null, 'b') } @Unroll def 'should get subpath' () { expect: - new CidPath(fs, PATH).subpath(BEGIN,END) == EXPECTED + new LinPath(fs, PATH).subpath(BEGIN,END) == EXPECTED where: PATH | BEGIN | END | EXPECTED - '123' | 0 | 1 | new CidPath(fs, '123') - '123/a' | 0 | 2 | new CidPath(fs, '123/a') - '123/a/' | 0 | 2 | new CidPath(fs, '123/a') - '123/a' | 1 | 2 | new CidPath(null, 'a') - '123/a/' | 1 | 2 | new CidPath(null, 'a') - '123/a/b' | 2 | 3 | new CidPath(null, 'b') - '123/a/b' | 1 | 3 | new CidPath(null, 'a/b') + '123' | 0 | 1 | new LinPath(fs, '123') + '123/a' | 0 | 2 | new LinPath(fs, '123/a') + '123/a/' | 0 | 2 | new LinPath(fs, '123/a') + '123/a' | 1 | 2 | new LinPath(null, 'a') + '123/a/' | 1 | 2 | new LinPath(null, 'a') + '123/a/b' | 2 | 3 | new LinPath(null, 'b') + '123/a/b' | 1 | 3 | new LinPath(null, 'a/b') } def 'should normalize a path' () { expect: - new CidPath(fs, '123').normalize() == new CidPath(fs, '123') - new CidPath(fs, '123/a/b').normalize() == new CidPath(fs, '123/a/b') - new CidPath(fs, '123/./a/b').normalize() == new CidPath(fs, '123/a/b') - new CidPath(fs, '123/a/../a/b').normalize() == new CidPath(fs, '123/a/b') + new LinPath(fs, '123').normalize() == new LinPath(fs, '123') + new LinPath(fs, '123/a/b').normalize() == new LinPath(fs, '123/a/b') + new LinPath(fs, '123/./a/b').normalize() == new LinPath(fs, '123/a/b') + new LinPath(fs, '123/a/../a/b').normalize() == new LinPath(fs, '123/a/b') } @Unroll def 'should validate startWith' () { expect: - new CidPath(fs,PATH).startsWith(OTHER) == EXPECTED + new LinPath(fs,PATH).startsWith(OTHER) == EXPECTED where: PATH | OTHER | EXPECTED '12345/a/b' | '12345' | true @@ -322,7 +330,7 @@ class CidPathTest extends Specification { @Unroll def 'should validate endsWith' () { expect: - new CidPath(fs,PATH).endsWith(OTHER) == EXPECTED + new LinPath(fs,PATH).endsWith(OTHER) == EXPECTED where: PATH | OTHER | EXPECTED '12345/a/b' | 'b' | true @@ -335,22 +343,22 @@ class CidPathTest extends Specification { def 'should validate isAbsolute' () { expect: - new CidPath(fs,'1234/a/b/c').isAbsolute() - new CidPath(fs,'1234/a/b/c').getRoot().isAbsolute() - new CidPath(fs,'1234/a/b/c').getParent().isAbsolute() - new CidPath(fs,'1234/a/b/c').normalize().isAbsolute() - new CidPath(fs,'1234/a/b/c').getName(0).isAbsolute() - new CidPath(fs,'1234/a/b/c').subpath(0,2).isAbsolute() + new LinPath(fs,'1234/a/b/c').isAbsolute() + new LinPath(fs,'1234/a/b/c').getRoot().isAbsolute() + new LinPath(fs,'1234/a/b/c').getParent().isAbsolute() + new LinPath(fs,'1234/a/b/c').normalize().isAbsolute() + new LinPath(fs,'1234/a/b/c').getName(0).isAbsolute() + new LinPath(fs,'1234/a/b/c').subpath(0,2).isAbsolute() and: - !new CidPath(fs,'1234/a/b/c').getFileName().isAbsolute() - !new CidPath(fs,'1234/a/b/c').getName(1).isAbsolute() - !new CidPath(fs,'1234/a/b/c').subpath(1,3).isAbsolute() + !new LinPath(fs,'1234/a/b/c').getFileName().isAbsolute() + !new LinPath(fs,'1234/a/b/c').getName(1).isAbsolute() + !new LinPath(fs,'1234/a/b/c').subpath(1,3).isAbsolute() } @Unroll def 'should get root path' () { expect: - new CidPath(fs,PATH).getRoot() == new CidPath(fs,EXPECTED) + new LinPath(fs,PATH).getRoot() == new LinPath(fs,EXPECTED) where: PATH | EXPECTED '12345' | '/' @@ -362,84 +370,84 @@ class CidPathTest extends Specification { BASE_PATH.relativize(PATH) == EXPECTED where : BASE_PATH | PATH | EXPECTED - new CidPath(fs, '/') | new CidPath(fs, '123/a/b/c') | new CidPath(null, '123/a/b/c') - new CidPath(fs,'123/a/') | new CidPath(fs, '123/a/b/c') | new CidPath(null, 'b/c') - new CidPath(fs,'123/a/') | new CidPath(fs, '321/a/') | new CidPath(null, '../../321/a') - new CidPath(null,'123/a') | new CidPath(null, '123/a/b/c') | new CidPath(null, 'b/c') - new CidPath(null,'123/a') | new CidPath(null, '321/a') | new CidPath(null, '../../321/a') - new CidPath(fs,'../a/') | new CidPath(fs, '321/a') | new CidPath(null, '../321/a') - new CidPath(fs,'321/a/') | new CidPath(fs, '../a') | new CidPath(null, '../../a') - new CidPath(null,'321/a/') | new CidPath(null, '../a') | new CidPath(null, '../../../a') + new LinPath(fs, '/') | new LinPath(fs, '123/a/b/c') | new LinPath(null, '123/a/b/c') + new LinPath(fs,'123/a/') | new LinPath(fs, '123/a/b/c') | new LinPath(null, 'b/c') + new LinPath(fs,'123/a/') | new LinPath(fs, '321/a/') | new LinPath(null, '../../321/a') + new LinPath(null,'123/a') | new LinPath(null, '123/a/b/c') | new LinPath(null, 'b/c') + new LinPath(null,'123/a') | new LinPath(null, '321/a') | new LinPath(null, '../../321/a') + new LinPath(fs,'../a/') | new LinPath(fs, '321/a') | new LinPath(null, '../321/a') + new LinPath(fs,'321/a/') | new LinPath(fs, '../a') | new LinPath(null, '../../a') + new LinPath(null,'321/a/') | new LinPath(null, '../a') | new LinPath(null, '../../../a') } def 'relativize should throw exception' () { given: - def cid1 = new CidPath(fs,'123/a/') - def cid2 = new CidPath(null,'123/a/') - def cid3 = new CidPath(null, '../a/b') + def lid1 = new LinPath(fs,'123/a/') + def lid2 = new LinPath(null,'123/a/') + def lid3 = new LinPath(null, '../a/b') when: 'comparing relative with absolute' - cid1.relativize(cid2) + lid1.relativize(lid2) then: thrown(IllegalArgumentException) when: 'undefined base path' - cid3.relativize(cid2) + lid3.relativize(lid2) then: thrown(IllegalArgumentException) } def 'should resolve path' () { when: - def cid1 = new CidPath(fs, '123/a/b/c') - def cid2 = new CidPath(fs, '321/x/y/z') - def rel1 = new CidPath(null, 'foo') - def rel2 = new CidPath(null, 'bar/') + def lid1 = new LinPath(fs, '123/a/b/c') + def lid2 = new LinPath(fs, '321/x/y/z') + def rel1 = new LinPath(null, 'foo') + def rel2 = new LinPath(null, 'bar/') then: - cid1.resolve(cid2) == cid2 - cid2.resolve(cid1) == cid1 + lid1.resolve(lid2) == lid2 + lid2.resolve(lid1) == lid1 and: - cid1.resolve(rel1) == new CidPath(fs,'123/a/b/c/foo') - cid1.resolve(rel2) == new CidPath(fs,'123/a/b/c/bar') + lid1.resolve(rel1) == new LinPath(fs,'123/a/b/c/foo') + lid1.resolve(rel2) == new LinPath(fs,'123/a/b/c/bar') and: - rel1.resolve(rel2) == new CidPath(null, 'foo/bar') - rel2.resolve(rel1) == new CidPath(null, 'bar/foo') + rel1.resolve(rel2) == new LinPath(null, 'foo/bar') + rel2.resolve(rel1) == new LinPath(null, 'bar/foo') } def 'should resolve path as string' () { given: - def pr = Mock(CidFileSystemProvider) - def cidfs = Mock(CidFileSystem){ + def pr = Mock(LinFileSystemProvider) + def lidfs = Mock(LinFileSystem){ provider() >> pr} - def cid1 = new CidPath(cidfs, '123/a/b/c') + def lid1 = new LinPath(lidfs, '123/a/b/c') expect: - cid1.resolve('x/y') == new CidPath(cidfs, '123/a/b/c/x/y') - cid1.resolve('/x/y/') == new CidPath(cidfs, '123/a/b/c/x/y') + lid1.resolve('x/y') == new LinPath(lidfs, '123/a/b/c/x/y') + lid1.resolve('/x/y/') == new LinPath(lidfs, '123/a/b/c/x/y') when: - def result = cid1.resolve('cid://321') + def result = lid1.resolve('lid://321') then: - pr.getPath(CidPath.asUri('cid://321')) >> new CidPath(cidfs, '321') + pr.getPath(LinPath.asUri('lid://321')) >> new LinPath(lidfs, '321') and: - result == new CidPath(cidfs, '321') + result == new LinPath(lidfs, '321') } - def 'should throw illegat exception when not correct scheme' (){ + def 'should throw illegal exception when not correct scheme' (){ when: 'creation' - new CidPath(fs, new URI("http://1234")) + new LinPath(fs, new URI("http://1234")) then: thrown(IllegalArgumentException) when: 'asUri' - CidPath.asUri("http://1234") + LinPath.asUri("http://1234") then: thrown(IllegalArgumentException) when: 'asUri' - CidPath.asUri("") + LinPath.asUri("") then: thrown(IllegalArgumentException) @@ -447,65 +455,65 @@ class CidPathTest extends Specification { def 'should throw provider mismatch exception when different path types' () { given: - def pr = Mock(CidFileSystemProvider) - def fs = Mock(CidFileSystem){ + def pr = Mock(LinFileSystemProvider) + def fs = Mock(LinFileSystem){ provider() >> pr} and: - def cid = new CidPath(fs, '123/a/b/c') + def lid = new LinPath(fs, '123/a/b/c') when: 'resolve with path' - cid.resolve(Path.of('d')) + lid.resolve(Path.of('d')) then: thrown(ProviderMismatchException) when: 'resolve with uri string' - cid.resolve(Path.of('http://1234')) + lid.resolve(Path.of('http://1234')) then: thrown(ProviderMismatchException) when: 'relativize' - cid.relativize(Path.of('d')) + lid.relativize(Path.of('d')) then: thrown(ProviderMismatchException) } def 'should throw exception for unsupported methods' () { given: - def pr = Mock(CidFileSystemProvider) - def fs = Mock(CidFileSystem){ + def pr = Mock(LinFileSystemProvider) + def fs = Mock(LinFileSystem){ provider() >> pr} and: - def cid = new CidPath(fs, '123/a/b/c') + def lid = new LinPath(fs, '123/a/b/c') when: 'to file' - cid.toFile() + lid.toFile() then: thrown(UnsupportedOperationException) when: 'register' - cid.register(null, null,null) + lid.register(null, null,null) then: thrown(UnsupportedOperationException) } def 'should throw exception for incorrect index'() { when: 'getting name with negative index' - new CidPath(fs, "1234").getName(-1) + new LinPath(fs, "1234").getName(-1) then: thrown(IllegalArgumentException) when: 'getting name with larger index tha namecount' - new CidPath(fs, "1234").getName(2) + new LinPath(fs, "1234").getName(2) then: thrown(IllegalArgumentException) when: 'getting subpath with negative index' - new CidPath(fs, "1234").subpath(-1,1) + new LinPath(fs, "1234").subpath(-1,1) then: thrown(IllegalArgumentException) when: 'getting subpath with larger index tha namecount' - new CidPath(fs, "1234").subpath(0,2) + new LinPath(fs, "1234").subpath(0,2) then: thrown(IllegalArgumentException) @@ -514,19 +522,19 @@ class CidPathTest extends Specification { @Unroll def 'should get to uri string' () { expect: - new CidPath(fs, PATH).toUriString() == EXPECTED + new LinPath(fs, PATH).toUriString() == EXPECTED where: PATH | EXPECTED - '/' | 'cid:///' - '1234' | 'cid://1234' - '1234/a/b/c' | 'cid://1234/a/b/c' - '' | 'cid:///' + '/' | 'lid:///' + '1234' | 'lid://1234' + '1234/a/b/c' | 'lid://1234/a/b/c' + '' | 'lid:///' } @Unroll def 'should get string' () { expect: - new CidPath(fs, PATH).toString() == EXPECTED + new LinPath(fs, PATH).toString() == EXPECTED where: PATH | EXPECTED '/' | '/' @@ -538,25 +546,25 @@ class CidPathTest extends Specification { @Unroll def 'should validate asString method'() { expect: - CidPath.asUriString(FIRST, MORE as String[]) == EXPECTED + LinPath.asUriString(FIRST, MORE as String[]) == EXPECTED where: FIRST | MORE | EXPECTED - 'foo' | [] | 'cid://foo' - 'foo/' | [] | 'cid://foo' - '/foo' | [] | 'cid://foo' + 'foo' | [] | 'lid://foo' + 'foo/' | [] | 'lid://foo' + '/foo' | [] | 'lid://foo' and: - 'a' | ['/b/'] | 'cid://a/b' - 'a' | ['/b','c'] | 'cid://a/b/c' - 'a' | ['/b','//c'] | 'cid://a/b/c' - 'a' | ['/b/c', 'd'] | 'cid://a/b/c/d' - '/a/' | ['/b/c', 'd'] | 'cid://a/b/c/d' + 'a' | ['/b/'] | 'lid://a/b' + 'a' | ['/b','c'] | 'lid://a/b/c' + 'a' | ['/b','//c'] | 'lid://a/b/c' + 'a' | ['/b/c', 'd'] | 'lid://a/b/c/d' + '/a/' | ['/b/c', 'd'] | 'lid://a/b/c/d' } @Unroll - def 'should check is cid uri string' () { + def 'should check is lid uri string' () { expect: - CidPath.isCidUri(STR) == EXPECTED + LinPath.isLidUri(STR) == EXPECTED where: STR | EXPECTED @@ -564,18 +572,18 @@ class CidPathTest extends Specification { '' | false 'foo' | false '/foo' | false - 'cid:/foo' | false - 'cid:foo' | false - 'cid/foo' | false + 'lid:/foo' | false + 'lid:foo' | false + 'lid/foo' | false and: - 'cid://' | true - 'cid:///' | true - 'cid://foo/bar' | true + 'lid://' | true + 'lid:///' | true + 'lid://foo/bar' | true } def 'should detect equals'(){ expect: - new CidPath(FS1, PATH1).equals(new CidPath(FS2, PATH2)) == EXPECTED + new LinPath(FS1, PATH1).equals(new LinPath(FS2, PATH2)) == EXPECTED where: FS1 | FS2 | PATH1 | PATH2 | EXPECTED null | fs | "12345/path" | "12345/path" | false @@ -595,7 +603,7 @@ class CidPathTest extends Specification { file.text = "this is a data file" def hash = CacheHelper.hasher(file).hash().toString() def correctData = new DataOutput(file.toString(), new Checksum(hash,"nextflow", "standard")) - CidPath.validateDataOutput(correctData) + LinPath.validateDataOutput(correctData) def stdout = capture .toString() .readLines()// remove the log part @@ -616,7 +624,7 @@ class CidPathTest extends Specification { file.text = "this is a data file" def hash = CacheHelper.hasher(file).hash().toString() def correctData = new DataOutput(file.toString(), new Checksum("abscd","nextflow", "standard")) - CidPath.validateDataOutput(correctData) + LinPath.validateDataOutput(correctData) def stdout = capture .toString() .readLines()// remove the log part @@ -638,7 +646,7 @@ class CidPathTest extends Specification { file.text = "this is a data file" def hash = CacheHelper.hasher(file).hash().toString() def correctData = new DataOutput(file.toString(), new Checksum(hash,"not-supported", "standard")) - CidPath.validateDataOutput(correctData) + LinPath.validateDataOutput(correctData) def stdout = capture .toString() .readLines()// remove the log part @@ -657,7 +665,7 @@ class CidPathTest extends Specification { def 'should throw exception when file not found validating hash'(){ when: def correctData = new DataOutput("not/existing/file", new Checksum("120741","nextflow", "standard")) - CidPath.validateDataOutput(correctData) + LinPath.validateDataOutput(correctData) then: thrown(FileNotFoundException) diff --git a/modules/nf-cid/src/test/nextflow/data/cid/model/ChecksumTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/model/ChecksumTest.groovy similarity index 98% rename from modules/nf-cid/src/test/nextflow/data/cid/model/ChecksumTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/model/ChecksumTest.groovy index e576c7b310..b4fb304a97 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/model/ChecksumTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/model/ChecksumTest.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.data.cid.model +package nextflow.lineage.model import nextflow.util.CacheHelper import spock.lang.Specification diff --git a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy similarity index 81% rename from modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy rename to modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy index 68456faa3d..89bd06a7a4 100644 --- a/modules/nf-cid/src/test/nextflow/data/cid/serde/CidEncoderTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy @@ -14,29 +14,29 @@ * limitations under the License. */ -package nextflow.data.cid.serde - -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.TaskOutputs -import nextflow.data.cid.model.TaskRun -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowOutputs -import nextflow.data.cid.model.WorkflowRun +package nextflow.lineage.serde + +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowRun import spock.lang.Specification import java.time.OffsetDateTime -class CidEncoderTest extends Specification{ +class LinEncoderTest extends Specification{ def 'should encode and decode Outputs'(){ given: - def encoder = new CidEncoder() + def encoder = new LinEncoder() and: def output = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), - "cid://source", "cid://workflow", "cid://task", 1234) + "lid://source", "lid://workflow", "lid://task", 1234) when: def encoded = encoder.encode(output) @@ -50,14 +50,14 @@ class CidEncoderTest extends Specification{ result.checksum.value == "hash_value" result.checksum.algorithm == "hash_algorithm" result.checksum.mode == "standard" - result.source == "cid://source" + result.source == "lid://source" result.size == 1234 } def 'should encode and decode WorkflowRuns'(){ given: - def encoder = new CidEncoder() + def encoder = new LinEncoder() and: def uniqueId = UUID.randomUUID() def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) @@ -85,10 +85,10 @@ class CidEncoderTest extends Specification{ def 'should encode and decode WorkflowResults'(){ given: - def encoder = new CidEncoder() + def encoder = new LinEncoder() and: def time = OffsetDateTime.now() - def wfResults = new WorkflowOutputs(time, "cid://1234", [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")]) + def wfResults = new WorkflowOutputs(time, "lid://1234", [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")]) when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) @@ -97,13 +97,13 @@ class CidEncoderTest extends Specification{ object instanceof WorkflowOutputs def result = object as WorkflowOutputs result.createdAt == time - result.workflowRun == "cid://1234" + result.workflowRun == "lid://1234" result.outputs == [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")] } def 'should encode and decode TaskRun'() { given: - def encoder = new CidEncoder() + def encoder = new LinEncoder() and: def uniqueId = UUID.randomUUID() def taskRun = new TaskRun( @@ -135,11 +135,11 @@ class CidEncoderTest extends Specification{ def 'should encode and decode TaskResults'(){ given: - def encoder = new CidEncoder() + def encoder = new LinEncoder() and: def time = OffsetDateTime.now() def parameter = new Parameter("a","b", "c") - def wfResults = new TaskOutputs("cid://1234", "cid://5678", time, [parameter], null) + def wfResults = new TaskOutputs("lid://1234", "lid://5678", time, [parameter], null) when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) @@ -148,22 +148,22 @@ class CidEncoderTest extends Specification{ object instanceof TaskOutputs def result = object as TaskOutputs result.createdAt == time - result.taskRun == "cid://1234" - result.workflowRun == "cid://5678" + result.taskRun == "lid://1234" + result.workflowRun == "lid://5678" result.outputs.size() == 1 result.outputs[0] == parameter } def 'object with null date attributes' () { given: - def encoder = new CidEncoder() + def encoder = new LinEncoder() and: - def wfResults = new WorkflowOutputs(null, "cid://1234") + def wfResults = new WorkflowOutputs(null, "lid://1234") when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) then: - encoded == '{"type":"WorkflowOutputs","createdAt":null,"workflowRun":"cid://1234","outputs":null,"annotations":null}' + encoded == '{"type":"WorkflowOutputs","createdAt":null,"workflowRun":"lid://1234","outputs":null,"annotations":null}' def result = object as WorkflowOutputs result.createdAt == null diff --git a/packing.gradle b/packing.gradle index d1ba8bc07a..c22846f31a 100644 --- a/packing.gradle +++ b/packing.gradle @@ -14,9 +14,9 @@ dependencies { api project(':nextflow') // include Ivy at runtime in order to have Grape @Grab work correctly defaultCfg "org.apache.ivy:ivy:2.5.2" - // default cfg = runtime + httpfs + cid + amazon + tower client + wave client + // default cfg = runtime + httpfs + lineage + amazon + tower client + wave client defaultCfg project(':nf-httpfs') - defaultCfg project(':nf-cid') + defaultCfg project(':nf-lineage') console project(':plugins:nf-console') google project(':plugins:nf-google') amazon project(':plugins:nf-amazon') diff --git a/plugins/nf-cid-h2/build.gradle b/plugins/nf-lineage-h2/build.gradle similarity index 94% rename from plugins/nf-cid-h2/build.gradle rename to plugins/nf-lineage-h2/build.gradle index 9b9f35f227..8d38fe1425 100644 --- a/plugins/nf-cid-h2/build.gradle +++ b/plugins/nf-lineage-h2/build.gradle @@ -36,7 +36,7 @@ configurations { dependencies { compileOnly project(':nextflow') - compileOnly project(':nf-cid') + compileOnly project(':nf-lineage') compileOnly 'org.slf4j:slf4j-api:2.0.16' compileOnly 'org.pf4j:pf4j:3.12.0' @@ -44,6 +44,6 @@ dependencies { api("com.zaxxer:HikariCP:5.0.1") api("org.apache.groovy:groovy-sql:4.0.26") { transitive=false } - testImplementation(project(':nf-cid')) + testImplementation(project(':nf-lineage')) testImplementation(testFixtures(project(":nextflow"))) } diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy similarity index 71% rename from plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy rename to plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy index 56aad878ea..3cb0c91c34 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidHistoryLog.groovy +++ b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy @@ -15,7 +15,7 @@ * */ -package nextflow.data.cid.h2 +package nextflow.lineage.h2 import java.sql.Timestamp @@ -24,21 +24,21 @@ import groovy.sql.GroovyRowResult import groovy.sql.Sql import groovy.transform.CompileStatic import groovy.util.logging.Slf4j -import nextflow.data.cid.CidHistoryLog -import nextflow.data.cid.CidHistoryRecord +import nextflow.lineage.LinHistoryLog +import nextflow.lineage.LinHistoryRecord /** - * Implement a {@link CidHistoryLog} based on H2 database + * Implement a {@link LinHistoryLog} based on H2 database * * @author Paolo Di Tommaso */ @Slf4j @CompileStatic -class H2CidHistoryLog implements CidHistoryLog { +class H2LinHistoryLog implements LinHistoryLog { private HikariDataSource dataSource - H2CidHistoryLog(HikariDataSource dataSource) { + H2LinHistoryLog(HikariDataSource dataSource) { this.dataSource = dataSource } @@ -46,7 +46,7 @@ class H2CidHistoryLog implements CidHistoryLog { void write(String name, UUID sessionId, String runCid) { try(final sql=new Sql(dataSource)) { def query = """ - INSERT INTO cid_history_record (timestamp, run_name, session_id, run_cid) + INSERT INTO lid_history_record (timestamp, run_name, session_id, run_lid) VALUES (?, ?, ?, ?) """ def timestamp = new Timestamp(System.currentTimeMillis()) // Current timestamp @@ -55,17 +55,17 @@ class H2CidHistoryLog implements CidHistoryLog { } @Override - void updateRunCid(UUID sessionId, String runCid) { + void updateRunLid(UUID sessionId, String runLid) { try(final sql=new Sql(dataSource)) { def query = """ - UPDATE cid_history_record - SET run_cid = ? + UPDATE lid_history_record + SET run_lid = ? WHERE session_id = ? """ - final count = sql.executeUpdate(query, List.of(runCid, sessionId.toString())) + final count = sql.executeUpdate(query, List.of(runLid, sessionId.toString())) if (count > 0) { - log.debug "Successfully updated run_cid for session_id: $sessionId" + log.debug "Successfully updated run_lid for session_id: $sessionId" } else { log.warn "No record found with session_id: $sessionId" @@ -74,18 +74,18 @@ class H2CidHistoryLog implements CidHistoryLog { } @Override - List getRecords() { + List getRecords() { try(final sql=new Sql(dataSource)) { - final result = new ArrayList(100) - final query = "SELECT * FROM cid_history_record " + final result = new ArrayList(100) + final query = "SELECT * FROM lid_history_record " final rows = sql.rows(query) for( GroovyRowResult row : rows ) { result.add( - new CidHistoryRecord( + new LinHistoryRecord( row.timestamp as Date, row.run_name as String, UUID.fromString(row.session_id as String), - row.run_cid as String, + row.run_lid as String, ) ) } @@ -94,17 +94,17 @@ class H2CidHistoryLog implements CidHistoryLog { } @Override - CidHistoryRecord getRecord(UUID sessionId) { + LinHistoryRecord getRecord(UUID sessionId) { try(final sql=new Sql(dataSource)) { - final query = "SELECT * FROM cid_history_record WHERE session_id = ?" + final query = "SELECT * FROM lid_history_record WHERE session_id = ?" final row = sql.firstRow(query, sessionId.toString()) // Convert UUID to String for query if( !row ) return null - return new CidHistoryRecord( + return new LinHistoryRecord( row.timestamp as Date, row.run_name as String, UUID.fromString(row.session_id as String), - row.run_cid as String, + row.run_lid as String, ) } } diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy similarity index 89% rename from plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy rename to plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy index b1ae33d595..207f9a2d44 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidPlugin.groovy +++ b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy @@ -15,7 +15,7 @@ * */ -package nextflow.data.cid.h2 +package nextflow.lineage.h2 import groovy.transform.CompileStatic import nextflow.plugin.BasePlugin @@ -27,9 +27,9 @@ import org.pf4j.PluginWrapper * @author Paolo Di Tommaso */ @CompileStatic -class H2CidPlugin extends BasePlugin{ +class H2LinPlugin extends BasePlugin{ - H2CidPlugin(PluginWrapper wrapper) { + H2LinPlugin(PluginWrapper wrapper) { super(wrapper) } } diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy similarity index 75% rename from plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy rename to plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy index 9586501699..0fb590c21d 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStore.groovy +++ b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy @@ -15,7 +15,7 @@ * */ -package nextflow.data.cid.h2 +package nextflow.lineage.h2 import groovy.json.JsonSlurper @@ -25,13 +25,13 @@ import com.zaxxer.hikari.HikariDataSource import groovy.sql.Sql import groovy.transform.CompileStatic import groovy.util.logging.Slf4j -import nextflow.data.cid.CidHistoryLog -import nextflow.data.cid.CidStore -import nextflow.data.cid.CidUtils -import nextflow.data.cid.serde.CidEncoder -import nextflow.data.cid.serde.CidSerializable -import nextflow.data.config.DataConfig -import nextflow.data.config.DataStoreOpts +import nextflow.lineage.LinHistoryLog +import nextflow.lineage.LinStore +import nextflow.lineage.LinUtils +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.serde.LinSerializable +import nextflow.lineage.config.LineageConfig +import nextflow.lineage.config.LineageStoreOpts import nextflow.util.TestOnly /** * @@ -39,16 +39,16 @@ import nextflow.util.TestOnly */ @Slf4j @CompileStatic -class H2CidStore implements CidStore { +class H2LinStore implements LinStore { private HikariDataSource dataSource - private CidEncoder encoder + private LinEncoder encoder @Override - H2CidStore open(DataConfig config) { + H2LinStore open(LineageConfig config) { assert config.store.location.startsWith('jdbc:h2:') log.info "Connecting CID H2 store: '${config.store.location}'" - encoder = new CidEncoder() + encoder = new LinEncoder() dataSource = createDataSource(config.store) // create the db tables createDbTables(dataSource) @@ -56,7 +56,7 @@ class H2CidStore implements CidStore { return this } - static HikariDataSource createDataSource(DataStoreOpts store) { + static HikariDataSource createDataSource(LineageStoreOpts store) { final result = new HikariDataSource() result.jdbcUrl = store.location result.driverClassName = 'org.h2.Driver' @@ -70,25 +70,25 @@ class H2CidStore implements CidStore { // create DDL is missing try(final sql=new Sql(dataSource)) { sql.execute(''' - CREATE TABLE IF NOT EXISTS cid_file ( + CREATE TABLE IF NOT EXISTS lid_file ( id BIGINT AUTO_INCREMENT PRIMARY KEY, path VARCHAR UNIQUE NOT NULL, metadata CLOB NOT NULL ); - CREATE TABLE IF NOT EXISTS cid_file_tag ( + CREATE TABLE IF NOT EXISTS lid_file_tag ( file_id BIGINT NOT NULL, tags TEXT NOT NULL, PRIMARY KEY (file_id), - FOREIGN KEY (file_id) REFERENCES cid_file(id) ON DELETE CASCADE + FOREIGN KEY (file_id) REFERENCES lid_file(id) ON DELETE CASCADE ); - CREATE TABLE IF NOT EXISTS cid_history_record ( + CREATE TABLE IF NOT EXISTS lid_history_record ( id IDENTITY PRIMARY KEY, -- Auto-increment primary key timestamp TIMESTAMP NOT NULL, run_name VARCHAR(255) NOT NULL, session_id UUID NOT NULL, - run_cid VARCHAR(255) NOT NULL, + run_lid VARCHAR(255) NOT NULL, UNIQUE (run_name, session_id) -- Enforce uniqueness constraint ); ''') @@ -98,25 +98,25 @@ class H2CidStore implements CidStore { static void createAlias(HikariDataSource dataSource){ try(final sql=new Sql(dataSource)) { sql.execute(""" - CREATE ALIAS IF NOT EXISTS JSON_MATCH FOR "nextflow.data.cid.h2.H2CidStore.matchesJsonQuery" + CREATE ALIAS IF NOT EXISTS JSON_MATCH FOR "nextflow.lineage.h2.H2LinStore.matchesJsonQuery" """) } } @Override - void save(String key, CidSerializable object) { + void save(String key, LinSerializable object) { final value = encoder.encode(object) try(final sql=new Sql(dataSource)) { sql.execute(""" - INSERT INTO cid_file (path, metadata) VALUES (?, ?) + INSERT INTO lid_file (path, metadata) VALUES (?, ?) """, [key, (Object)value]) } } @Override - CidSerializable load(String key) { + LinSerializable load(String key) { try(final sql=new Sql(dataSource)) { - final result = sql.firstRow("SELECT metadata FROM cid_file WHERE path = ?", List.of(key)) + final result = sql.firstRow("SELECT metadata FROM lid_file WHERE path = ?", List.of(key)) return result ? encoder.decode(toValue(result.metadata).toString()) : null } } @@ -128,15 +128,15 @@ class H2CidStore implements CidStore { } @Override - CidHistoryLog getHistoryLog() { - return new H2CidHistoryLog(dataSource) + LinHistoryLog getHistoryLog() { + return new H2LinHistoryLog(dataSource) } @Override - Map search(String queryString) { - final results= new HashMap() + Map search(String queryString) { + final results= new HashMap() try(final sql=new Sql(dataSource)) { - sql.eachRow("SELECT path, metadata FROM cid_file WHERE JSON_MATCH(metadata, ?)", List.of(queryString)) { row -> + sql.eachRow("SELECT path, metadata FROM lid_file WHERE JSON_MATCH(metadata, ?)", List.of(queryString)) { row -> results.put(row['path'] as String, encoder.decode(toValue(row['metadata']) as String)) } } @@ -150,8 +150,8 @@ class H2CidStore implements CidStore { */ static boolean matchesJsonQuery(String jsonString, String queryString) { def json = new JsonSlurper().parseText(jsonString) - def conditions = CidUtils.parseQuery(queryString) - return CidUtils.checkParams(json, conditions) + def conditions = LinUtils.parseQuery(queryString) + return LinUtils.checkParams(json, conditions) } @Override diff --git a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy similarity index 72% rename from plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy rename to plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy index f32eb1004d..466c4579df 100644 --- a/plugins/nf-cid-h2/src/main/nextflow/data/cid/h2/H2CidStoreFactory.groovy +++ b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy @@ -15,27 +15,27 @@ * */ -package nextflow.data.cid.h2 +package nextflow.lineage.h2 import groovy.transform.CompileStatic import groovy.util.logging.Slf4j -import nextflow.data.cid.CidStore -import nextflow.data.cid.CidStoreFactory -import nextflow.data.config.DataConfig +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.config.LineageConfig import nextflow.plugin.Priority @Slf4j @CompileStatic @Priority(-10) // <-- lower is higher, this is needed to override default provider behavior -class H2CidStoreFactory extends CidStoreFactory { +class H2LinStoreFactory extends LinStoreFactory { @Override - boolean canOpen(DataConfig config) { + boolean canOpen(LineageConfig config) { return config.store.location.startsWith('jdbc:h2:') } @Override - protected CidStore newInstance(DataConfig config) { - return new H2CidStore().open(config) + protected LinStore newInstance(LineageConfig config) { + return new H2LinStore().open(config) } } diff --git a/plugins/nf-cid-h2/src/resources/META-INF/MANIFEST.MF b/plugins/nf-lineage-h2/src/resources/META-INF/MANIFEST.MF similarity index 59% rename from plugins/nf-cid-h2/src/resources/META-INF/MANIFEST.MF rename to plugins/nf-lineage-h2/src/resources/META-INF/MANIFEST.MF index 9eab0f2267..012e959388 100644 --- a/plugins/nf-cid-h2/src/resources/META-INF/MANIFEST.MF +++ b/plugins/nf-lineage-h2/src/resources/META-INF/MANIFEST.MF @@ -1,6 +1,6 @@ Manifest-Version: 1.0 -Plugin-Class: nextflow.data.cid.h2.H2CidPlugin -Plugin-Id: nf-cid-h2 +Plugin-Class: nextflow.lineage.h2.H2LinPlugin +Plugin-Id: nf-lineage-h2 Plugin-Version: 0.1.0 Plugin-Provider: Seqera Labs Plugin-Requires: >=25.01.0-edge diff --git a/plugins/nf-cid-h2/src/resources/META-INF/extensions.idx b/plugins/nf-lineage-h2/src/resources/META-INF/extensions.idx similarity index 89% rename from plugins/nf-cid-h2/src/resources/META-INF/extensions.idx rename to plugins/nf-lineage-h2/src/resources/META-INF/extensions.idx index b61797ca96..16c6f06f0c 100644 --- a/plugins/nf-cid-h2/src/resources/META-INF/extensions.idx +++ b/plugins/nf-lineage-h2/src/resources/META-INF/extensions.idx @@ -14,5 +14,5 @@ # limitations under the License. # -nextflow.data.cid.h2.H2CidPlugin -nextflow.data.cid.h2.H2CidStoreFactory +nextflow.lineage.h2.H2LinPlugin +nextflow.lineage.h2.H2LinStoreFactory diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy similarity index 82% rename from plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy rename to plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy index e8878bf2fc..9a597da176 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidHistoryLogTest.groovy +++ b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy @@ -15,9 +15,9 @@ * */ -package nextflow.data.cid.h2 +package nextflow.lineage.h2 -import nextflow.data.config.DataConfig +import nextflow.lineage.config.LineageConfig import spock.lang.Shared import spock.lang.Specification @@ -25,15 +25,15 @@ import spock.lang.Specification * * @author Paolo Di Tommaso */ -class H2CidHistoryLogTest extends Specification { +class H2LinHistoryLogTest extends Specification { @Shared - H2CidStore store + H2LinStore store def setupSpec() { def uri = "jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1" - def config = new DataConfig([store:[location:uri]]) - store = new H2CidStore().open(config) + def config = new LineageConfig([store:[location:uri]]) + store = new H2LinStore().open(config) } def cleanupSpec() { @@ -44,7 +44,7 @@ class H2CidHistoryLogTest extends Specification { store.truncateAllTables() } - def 'should write cid record' () { + def 'should write lid record' () { given: def log = store.getHistoryLog() def uuid = UUID.randomUUID() @@ -58,10 +58,10 @@ class H2CidHistoryLogTest extends Specification { then: rec.runName == 'foo' rec.sessionId == uuid - rec.runCid == '1234' + rec.runLid == '1234' } - def 'should update run cid' () { + def 'should update run lid' () { given: def log = store.getHistoryLog() def uuid = UUID.randomUUID() @@ -71,7 +71,7 @@ class H2CidHistoryLogTest extends Specification { noExceptionThrown() when: - log.updateRunCid(uuid, '4444') + log.updateRunLid(uuid, '4444') then: noExceptionThrown() @@ -80,7 +80,7 @@ class H2CidHistoryLogTest extends Specification { then: rec.runName == 'foo' rec.sessionId == uuid - rec.runCid == '4444' + rec.runLid == '4444' } def 'should update get records' () { @@ -103,15 +103,15 @@ class H2CidHistoryLogTest extends Specification { and: all[0].runName == 'foo1' all[0].sessionId == uuid1 - all[0].runCid == '1' + all[0].runLid == '1' and: all[1].runName == 'foo2' all[1].sessionId == uuid2 - all[1].runCid == '2' + all[1].runLid == '2' and: all[2].runName == 'foo3' all[2].sessionId == uuid3 - all[2].runCid == '3' + all[2].runLid == '3' } } diff --git a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy similarity index 69% rename from plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy rename to plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy index 96ed7db377..903e92837b 100644 --- a/plugins/nf-cid-h2/src/test/nextflow/data/cid/h2/H2CidStoreTest.groovy +++ b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy @@ -15,35 +15,34 @@ * */ -package nextflow.data.cid.h2 +package nextflow.lineage.h2 -import nextflow.data.cid.model.Annotation -import nextflow.data.cid.model.Checksum -import nextflow.data.cid.model.DataPath -import nextflow.data.cid.model.DataOutput -import nextflow.data.cid.model.Parameter -import nextflow.data.cid.model.Workflow -import nextflow.data.cid.model.WorkflowRun -import nextflow.data.config.DataConfig +import nextflow.lineage.model.Annotation +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.config.LineageConfig import spock.lang.Shared import spock.lang.Specification -import java.time.Instant import java.time.OffsetDateTime /** * * @author Paolo Di Tommaso */ -class H2CidStoreTest extends Specification { +class H2LinStoreTest extends Specification { @Shared - H2CidStore store + H2LinStore store def setupSpec() { def uri = "jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1" - def config = new DataConfig([store:[location:uri]]) - store = new H2CidStore().open(config) + def config = new LineageConfig([store:[location:uri]]) + store = new H2LinStore().open(config) } def cleanupSpec() { @@ -52,7 +51,7 @@ class H2CidStoreTest extends Specification { def 'should store and get a value' () { given: - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "cid://source", "cid://workflow", "cid//task", 1234) + def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid//task", 1234) when: store.save('/some/key', value) then: @@ -68,11 +67,11 @@ class H2CidStoreTest extends Specification { def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) def key2 = "testKey2" - def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [new Annotation("key1", "value1"), new Annotation("key2", "value2")]) + def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key1", "value1"), new Annotation("key2", "value2")]) def key3 = "testKey3" - def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [new Annotation("key2", "value2"), new Annotation("key3", "value3")]) + def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key2", "value2"), new Annotation("key3", "value3")]) def key4 = "testKey4" - def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "cid://workflow", "cid//task", 1234, time, time, [new Annotation("key3", "value3"), new Annotation("key4", "value4")]) + def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key3", "value3"), new Annotation("key4", "value4")]) store.save(key, value1) store.save(key2, value2) diff --git a/settings.gradle b/settings.gradle index 00ecc5e6ee..6e89b4728f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -27,7 +27,7 @@ include 'nextflow' include 'nf-commons' include 'nf-httpfs' include 'nf-lang' -include 'nf-cid' +include 'nf-lineage' include 'nf-lang' rootProject.children.each { prj -> @@ -44,4 +44,4 @@ include 'plugins:nf-codecommit' include 'plugins:nf-wave' include 'plugins:nf-cloudcache' include 'plugins:nf-k8s' -include 'plugins:nf-cid-h2' +include 'plugins:nf-lineage-h2' From abf0c3a8c062c3245b3efa40e8738cf516886da9 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 15 Apr 2025 20:23:56 +0200 Subject: [PATCH 53/72] fixes render, hint of closer property name Signed-off-by: jorgee --- .../groovy/nextflow/cli/CmdLineage.groovy | 18 ++++---- .../groovy/nextflow/cli/CmdLineageTest.groovy | 8 ++-- .../lineage/LinPropertyValidator.groovy | 18 ++++---- .../lineage/cli/LinCommandImpl.groovy | 41 ++++++++++++------- .../lineage/fs/LinFileSystemProvider.groovy | 3 +- .../lineage/cli/LinCommandImplTest.groovy | 12 +++--- 6 files changed, 58 insertions(+), 42 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy index 7b56c9595c..cc46bbd573 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy @@ -34,7 +34,7 @@ import org.pf4j.ExtensionPoint * @author Paolo Di Tommaso */ @CompileStatic -@Parameters(commandDescription = "Explore workflows CID metadata") +@Parameters(commandDescription = "Explore workflows lineage metadata") class CmdLineage extends CmdBase implements UsageAware { private static final String NAME = 'lineage' @@ -94,7 +94,7 @@ class CmdLineage extends CmdBase implements UsageAware { // load the command operations this.operation = Plugins.getExtension(LinCommand) if( !operation ) - throw new IllegalStateException("Unable to load CID plugin") + throw new IllegalStateException("Unable to load lineage extensions.") // consume the first argument getCmd(args).apply(args.drop(1)) } @@ -157,7 +157,7 @@ class CmdLineage extends CmdBase implements UsageAware { @Override String getDescription() { - return 'Print the Lineage execution log' + return 'Print the lineage execution log' } @Override @@ -186,7 +186,7 @@ class CmdLineage extends CmdBase implements UsageAware { @Override String getDescription() { - return 'Print the description of a Lineage ID' + return 'Print the description of a Lineage ID (lid)' } void apply(List args) { @@ -202,7 +202,7 @@ class CmdLineage extends CmdBase implements UsageAware { @Override void usage() { println description - println "Usage: nextflow $NAME $name " + println "Usage: nextflow $NAME $name " } } @@ -217,7 +217,7 @@ class CmdLineage extends CmdBase implements UsageAware { } void apply(List args) { - if (args.size() != 2) { + if (args.size() < 1 || args.size() > 2) { println("ERROR: Incorrect number of parameters") usage() return @@ -229,7 +229,7 @@ class CmdLineage extends CmdBase implements UsageAware { @Override void usage() { println description - println "Usage: nextflow $NAME $name " + println "Usage: nextflow $NAME $name []" } } @@ -256,7 +256,7 @@ class CmdLineage extends CmdBase implements UsageAware { @Override void usage() { println description - println "Usage: nextflow $NAME $name " + println "Usage: nextflow $NAME $name " } } @@ -268,7 +268,7 @@ class CmdLineage extends CmdBase implements UsageAware { @Override String getDescription() { - return 'Find Lineage metadata descriptions matching with a query' + return 'Find lineage metadata descriptions matching with a query' } void apply(List args) { diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index e25d5655cf..f33e886ee4 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -215,8 +215,8 @@ class CmdLineageTest extends Specification { entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), new Checksum("abfsc2375","nextflow","standard"), - [new Parameter( "ValueInParam", "sample_id","ggal_gut"), - new Parameter("FileInParam","reads",["lid://45678/output.txt"])], + [new Parameter( "val", "sample_id","ggal_gut"), + new Parameter("path","reads",["lid://45678/output.txt"])], null, null, null, null, [:],[], null) lidFile3.text = encoder.encode(entry) entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), @@ -230,10 +230,10 @@ class CmdLineageTest extends Specification { final network = """flowchart BT lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} lid://123987/file.bam@{shape: document, label: "lid://123987/file.bam"} - lid://123987@{shape: process, label: "foo"} + lid://123987@{shape: process, label: "foo [lid://123987]"} ggal_gut@{shape: document, label: "ggal_gut"} lid://45678/output.txt@{shape: document, label: "lid://45678/output.txt"} - lid://45678@{shape: process, label: "bar"} + lid://45678@{shape: process, label: "bar [lid://45678]"} lid://123987/file.bam -->lid://12345/file.bam lid://123987 -->lid://123987/file.bam diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy index 86ea5cbbea..7c43845a1d 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy @@ -16,6 +16,7 @@ package nextflow.lineage +import groovy.transform.CompileStatic import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath @@ -26,30 +27,33 @@ import nextflow.lineage.model.Workflow import nextflow.lineage.model.WorkflowOutputs import nextflow.lineage.model.WorkflowRun -import java.lang.reflect.Field - /** * Class to validate if the string refers to a property in the classes of the Lineage Metadata model. * @author Jorge Ejarque */ +@CompileStatic class LinPropertyValidator { - private static List LID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum, Annotation] + private static List LID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum, Annotation] as List private Set validProperties LinPropertyValidator(){ this.validProperties = new HashSet() for( Class clazz: LID_MODEL_CLASSES) { - for( Field field: clazz.declaredFields) { + for( MetaProperty field: clazz.metaClass.getProperties()) { validProperties.add( field.name) } } } void validate(Collection properties) { - for(String property: properties) { - if (!(property in this.validProperties)) { - throw new IllegalArgumentException("Property '$property' doesn't exist in the lineage model") + for( String property: properties ) { + if( !(property in this.validProperties) ) { + def msg = "Property '$property' doesn't exist in the lineage model." + final matches = this.validProperties.closest(property) + if( matches ) + msg += " -- Did you mean one of these?" + matches.collect { " $it"}.join(', ') + throw new IllegalArgumentException(msg) } } } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index 87b56d3f77..c4724ff760 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -51,6 +51,7 @@ import org.eclipse.jgit.diff.RawTextComparator */ @CompileStatic class LinCommandImpl implements CmdLineage.LinCommand { + private static Path DEFAULT_HTML_FILE = Path.of("lineage-render.html") @Canonical static class Edge { @@ -117,8 +118,9 @@ class LinCommandImpl implements CmdLineage.LinCommand { return } try { - renderLineage(store, args[0], Path.of(args[1])) - println("Linage graph for ${args[0]} rendered in ${args[1]}") + final renderFile = args.size() > 1 ? Path.of(args[1]) : DEFAULT_HTML_FILE + renderLineage(store, args[0], renderFile) + println("Linage graph for ${args[0]} rendered in $renderFile") } catch (Throwable e) { println("ERROR: rendering lineage graph. ${e.message}") } @@ -142,6 +144,10 @@ class LinCommandImpl implements CmdLineage.LinCommand { file.text = template.replace('REPLACE_WITH_NETWORK_DATA', lines.join('\n')) } + private String safeId( String rawId){ + return rawId.replaceAll(/[^a-zA-Z0-9_.:\/\-]/, '_') + } + private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, LinStore store) { if (!isLidUri(nodeToRender)) throw new Exception("Identifier is not a LID URL") @@ -166,26 +172,28 @@ class LinCommandImpl implements CmdLineage.LinCommand { } private void processTaskRun(TaskRun taskRun, List lines, String nodeToRender, LinkedList nodes, LinkedList edges) { - lines << " ${nodeToRender}@{shape: process, label: \"${taskRun.name}\"}".toString() + lines << " ${nodeToRender}@{shape: process, label: \"${taskRun.name} [$nodeToRender]\"}".toString() final parameters = taskRun.inputs for (Parameter source : parameters) { - if (source.type.equals(FileInParam.simpleName)) { + if (source.type.equals("path")) { manageFileInParam(lines, nodeToRender, nodes, edges, source.value) } else { final label = convertToLabel(source.value.toString()) - lines << " ${source.value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(source.value.toString(), nodeToRender)) + final id = safeId(source.value.toString()) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) } } } private void processWorkflowRun(WorkflowRun wfRun, List lines, String nodeToRender, LinkedList edges) { - lines << " ${nodeToRender}@{shape: processes, label: \"${wfRun.name}\"}".toString() + lines << """ ${nodeToRender}@{shape: processes, label: \"${wfRun.name} [${nodeToRender}]\"}""".toString() final parameters = wfRun.params parameters.each { final label = convertToLabel(it.value.toString()) - lines << " ${it.value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(it.value.toString(), nodeToRender)) + final id = safeId(it.value.toString()) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) } } @@ -199,8 +207,9 @@ class LinCommandImpl implements CmdLineage.LinCommand { edges.add(new Edge(source, nodeToRender)) } else { final label = convertToLabel(source) - lines << " ${source}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(source, nodeToRender)) + final id = safeId(source) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) } } @@ -230,15 +239,17 @@ class LinCommandImpl implements CmdLineage.LinCommand { return } else { final label = convertToLabel(path) - lines << " ${path}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(path, nodeToRender)) + final id = safeId(path) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) return } } } final label = convertToLabel(value.toString()) - lines << " ${value.toString()}@{shape: document, label: \"${label}\"}".toString(); - edges.add(new Edge(value.toString(), nodeToRender)) + final id = safeId(value.toString()) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) } @Override diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy index 5328d97912..555a227d73 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy @@ -133,7 +133,7 @@ class LinFileSystemProvider extends FileSystemProvider { return newByteChannel0(lid, options, attrs) } - + @CompileStatic private class LinPathSeekableByteChannel implements SeekableByteChannel { SeekableByteChannel channel @@ -230,6 +230,7 @@ class LinFileSystemProvider extends FileSystemProvider { } } + @CompileStatic private class LidFilter implements DirectoryStream.Filter { private final LinFileSystem fs diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index d36027f742..92c8ad79fc 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -182,9 +182,9 @@ class LinCommandImplTest extends Specification{ entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), new Checksum("abfsc2375","nextflow","standard"), - [new Parameter( "ValueInParam", "sample_id","ggal_gut"), - new Parameter("FileInParam","reads",["lid://45678/output.txt"]), - new Parameter("FileInParam","input",[new DataPath("path/to/file",new Checksum("45372qe","nextflow","standard"))]) + [new Parameter( "val", "sample_id","ggal_gut"), + new Parameter("path","reads", ["lid://45678/output.txt"] ), + new Parameter("path","input", [new DataPath("path/to/file",new Checksum("45372qe","nextflow","standard"))]) ], null, null, null, null, [:],[], null) lidFile3.text = encoder.encode(entry) @@ -199,11 +199,11 @@ class LinCommandImplTest extends Specification{ final network = """flowchart BT lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} lid://123987/file.bam@{shape: document, label: "lid://123987/file.bam"} - lid://123987@{shape: process, label: "foo"} + lid://123987@{shape: process, label: "foo [lid://123987]"} ggal_gut@{shape: document, label: "ggal_gut"} path/to/file@{shape: document, label: "path/to/file"} lid://45678/output.txt@{shape: document, label: "lid://45678/output.txt"} - lid://45678@{shape: process, label: "bar"} + lid://45678@{shape: process, label: "bar [lid://45678]"} lid://123987/file.bam -->lid://12345/file.bam lid://123987 -->lid://123987/file.bam @@ -252,7 +252,7 @@ class LinCommandImplTest extends Specification{ lidFile3.text = encoder.encode(entry) final network = """flowchart BT lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} - lid://12345@{shape: processes, label: "run_name"} + lid://12345@{shape: processes, label: "run_name [lid://12345]"} ggal_gut@{shape: document, label: "ggal_gut"} 2.0@{shape: document, label: "2.0"} From ac7f6753ed086522d76f57cb82069b732d7fb372 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 15 Apr 2025 20:23:18 +0200 Subject: [PATCH 54/72] Just blanks [ci fast] Signed-off-by: Paolo Di Tommaso --- .../src/main/nextflow/lineage/LinPropertyValidator.groovy | 3 --- modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy | 2 -- 2 files changed, 5 deletions(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy index 7c43845a1d..35aa2ef1de 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy @@ -64,7 +64,4 @@ class LinPropertyValidator { } } - - - } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy index ffef02b86b..3f826b7a0a 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy @@ -60,6 +60,4 @@ interface LinStore extends Closeable { */ Map search(String queryString) - - } From 4c9f8e0da6a2e971f2a0edee9809efe08e65edc4 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 16 Apr 2025 10:43:19 +0200 Subject: [PATCH 55/72] Minor changes Signed-off-by: Paolo Di Tommaso --- .../main/nextflow/lineage/LinObserver.groovy | 12 +++---- .../src/main/nextflow/lineage/LinUtils.groovy | 31 +++++++++---------- .../lineage/cli/LinCommandImpl.groovy | 26 +++++++++------- .../main/nextflow/lineage/fs/LinPath.groovy | 18 +++++------ .../test/nextflow/lineage/LinUtilsTest.groovy | 4 ++- .../lineage/cli/LinCommandImplTest.groovy | 7 ++--- .../nextflow/lineage/fs/LinPathTest.groovy | 6 ++-- 7 files changed, 53 insertions(+), 51 deletions(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy index 39ceda8383..f61559f23a 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy @@ -87,7 +87,7 @@ class LinObserver implements TraceObserver { private String executionHash private LinStore store private Session session - private WorkflowOutputs workflowResults + private WorkflowOutputs workflowOutputs private Map outputsStoreDirLid = new HashMap(10) private PathNormalizer normalizer @@ -115,7 +115,7 @@ class LinObserver implements TraceObserver { normalizer = new PathNormalizer(session.workflowMetadata) executionHash = storeWorkflowRun(normalizer) final executionUri = asUriString(executionHash) - workflowResults = new WorkflowOutputs( + workflowOutputs = new WorkflowOutputs( OffsetDateTime.now(), executionUri, new LinkedList() @@ -125,10 +125,10 @@ class LinObserver implements TraceObserver { @Override void onFlowComplete(){ - if (this.workflowResults){ - workflowResults.createdAt = OffsetDateTime.now() + if (this.workflowOutputs){ + workflowOutputs.createdAt = OffsetDateTime.now() final key = executionHash + '#outputs' - this.store.save(key, workflowResults) + this.store.save(key, workflowOutputs) } } @@ -392,7 +392,7 @@ class LinObserver implements TraceObserver { @Override void onWorkflowPublish(String name, Object value){ - workflowResults.outputs.add(new Parameter(getParameterType(value), name, convertPathsToLidReferences(value))) + workflowOutputs.outputs.add(new Parameter(getParameterType(value), name, convertPathsToLidReferences(value))) } protected static String getParameterType(Object param) { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index 35d32f5616..79df01aa6d 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -16,19 +16,18 @@ package nextflow.lineage +import java.nio.file.attribute.FileTime +import java.time.OffsetDateTime +import java.time.ZoneId + import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.lineage.fs.LinPath -import nextflow.lineage.model.WorkflowRun import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.WorkflowRun import nextflow.lineage.serde.LinEncoder import nextflow.lineage.serde.LinSerializable import nextflow.serde.gson.GsonEncoder - -import java.nio.file.attribute.FileTime -import java.time.OffsetDateTime -import java.time.ZoneOffset - /** * Utils class for Lineage IDs. * @@ -59,7 +58,6 @@ class LinUtils { final children = parseChildrenFormFragment(uri.fragment) return searchPath(store, key, parameters, children ) } - } private static Collection globalSearch(LinStore store, URI uri) { @@ -106,7 +104,7 @@ class LinUtils { protected static List searchPath(LinStore store, String key, Map params, String[] children = []) { final object = store.load(key) if (!object) { - throw new FileNotFoundException("Lineage object $key not found.") + throw new FileNotFoundException("Lineage object $key not found") } final results = new LinkedList() if (children && children.size() > 0) { @@ -121,7 +119,7 @@ class LinUtils { private static void treatSubObject(LinStore store, String key, LinSerializable object, String[] children, Map params, LinkedList results) { final output = getSubObject(store, key, object, children) if (!output) { - throw new FileNotFoundException("Lineage object $key#${children.join('.')} not found.") + throw new FileNotFoundException("Lineage object $key#${children.join('.')} not found") } treatObject(output, params, results) } @@ -192,7 +190,8 @@ class LinUtils { } /** - * Check if an object fullfill the parameter-value + * Check if an object fulfill the parameter-value + * * @param object Object to evaluate * @param params parameter-value pairs to evaluate * @return true if all object parameters exist and matches with the value, otherwise false. @@ -274,16 +273,16 @@ class LinUtils { } /** - * Helper function to convert from FileTime to ISO 8601 with offser. + * Helper function to convert from FileTime to ISO 8601 with offset + * of current timezone. * * @param time File time to convert - * @return or null in case of not available (null) + * @return The {@link OffsetDateTime} for the corresponding file time or null in case of not available (null) */ static OffsetDateTime toDate(FileTime time){ - if (time) - return time.toInstant().atOffset(ZoneOffset.UTC) - else - return null + return time!=null + ? time.toInstant().atZone(ZoneId.systemDefault()).toOffsetDateTime() + : null } /** diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index c4724ff760..2df2f3766f 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -60,6 +60,8 @@ class LinCommandImpl implements CmdLineage.LinCommand { String label } + static final private String ERR_NOT_LOADED = 'Error lineage store not loaded - Check Nextflow configuration' + @Override void log(ConfigMap config) { final session = new Session(config) @@ -67,21 +69,21 @@ class LinCommandImpl implements CmdLineage.LinCommand { if (store) { printHistory(store) } else { - println "Error lineage store not loaded. Check Nextflow configuration." + println ERR_NOT_LOADED } } private void printHistory(LinStore store) { final records = store.historyLog?.records if( !records ) { - println("No workflow runs LIDs found.") + println("No workflow runs found in lineage history log") return } def table = new TableBuilder(cellSeparator: '\t') .head('TIMESTAMP') .head('RUN NAME') .head('SESSION ID') - .head('RUN LID') + .head('LINEAGE ID') for (LinHistoryRecord record : records) { table.append(record.toList()) } @@ -91,10 +93,10 @@ class LinCommandImpl implements CmdLineage.LinCommand { @Override void describe(ConfigMap config, List args) { if( !isLidUri(args[0]) ) - throw new Exception("Identifier is not a LID URL") + throw new Exception("Identifier is not a lineage URL") final store = LinStoreFactory.getOrCreate(new Session(config)) if ( !store ) { - println "Error lineage store not loaded. Check Nextflow configuration." + println ERR_NOT_LOADED return } try { @@ -106,7 +108,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { entries = entries.size() == 1 ? entries[0] : entries println LinUtils.encodeSearchOutputs(entries, true) } catch (Throwable e) { - println "Error loading ${args[0]}. ${e.message}" + println "Error loading ${args[0]} - ${e.message}" } } @@ -114,7 +116,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { void render(ConfigMap config, List args) { final store = LinStoreFactory.getOrCreate(new Session(config)) if( !store ) { - println "Error lineage store not loaded. Check Nextflow configuration." + println ERR_NOT_LOADED return } try { @@ -122,7 +124,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { renderLineage(store, args[0], renderFile) println("Linage graph for ${args[0]} rendered in $renderFile") } catch (Throwable e) { - println("ERROR: rendering lineage graph. ${e.message}") + println("ERROR: rendering lineage graph - ${e.message}") } } @@ -150,7 +152,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, LinStore store) { if (!isLidUri(nodeToRender)) - throw new Exception("Identifier is not a LID URL") + throw new Exception("Identifier is not a lineage URL") final key = nodeToRender.substring(LID_PROT.size()) final lidObject = store.load(key) switch (lidObject.getClass()) { @@ -255,11 +257,11 @@ class LinCommandImpl implements CmdLineage.LinCommand { @Override void diff(ConfigMap config, List args) { if (!isLidUri(args[0]) || !isLidUri(args[1])) - throw new Exception("Identifier is not a LID URL") + throw new Exception("Identifier is not a lineage URL") final store = LinStoreFactory.getOrCreate(new Session(config)) if (!store) { - println "Error lineage store not loaded. Check Nextflow configuration." + println ERR_NOT_LOADED return } try { @@ -315,7 +317,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { void find(ConfigMap config, List args) { final store = LinStoreFactory.getOrCreate(new Session(config)) if (!store) { - println "Error lineage store not loaded. Check Nextflow configuration." + println ERR_NOT_LOADED return } try { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index e0d7019455..0a88f0a995 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -114,7 +114,7 @@ class LinPath implements Path, LogicalDataPath { protected static void validateDataOutput(DataOutput lidObject) { final hashedPath = FileHelper.toCanonicalPath(lidObject.path as String) if( !hashedPath.exists() ) - throw new FileNotFoundException("Target path $lidObject.path does not exists.") + throw new FileNotFoundException("Target path $lidObject.path does not exist") validateChecksum(lidObject.checksum, hashedPath) } @@ -160,7 +160,7 @@ class LinPath implements Path, LogicalDataPath { throw new IllegalArgumentException("Cannot get target path for an empty LinPath") final store = fs.getStore() if( !store ) - throw new Exception("Lineage store not found. Check Nextflow configuration.") + throw new Exception("Lineage store not found - Check Nextflow configuration") final object = store.load(filePath) if ( object ){ if( object instanceof DataOutput ) { @@ -181,12 +181,12 @@ class LinPath implements Path, LogicalDataPath { return findTarget(fs, parent.toString(), false, newChildren as String[]) } } - throw new FileNotFoundException("Target path '$filePath' does not exists.") + throw new FileNotFoundException("Target path '$filePath' does not exist") } protected static Path getMetadataAsTargetPath(LinSerializable results, LinFileSystem fs, String filePath, String[] children){ if( !results ) { - throw new FileNotFoundException("Target path '$filePath' does not exist.") + throw new FileNotFoundException("Target path '$filePath' does not exist") } if (children && children.size() > 0) { return getSubObjectAsPath(fs, filePath, results, children) @@ -203,14 +203,14 @@ class LinPath implements Path, LogicalDataPath { * @param key Parent metadata key. * @param object Parent object. * @param children Array of string in indicating the properties to navigate to get the sub-object. - * @return LinMetadataPath or null in it does not exist. + * @return LinMetadataPath or null in it does not exist */ static LinMetadataPath getSubObjectAsPath(LinFileSystem fs, String key, LinSerializable object, String[] children) { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description final outputs = fs.store.load("${key}/outputs") if( !outputs ) { - throw new FileNotFoundException("Target path '$key#outputs' does not exist.") + throw new FileNotFoundException("Target path '$key#outputs' does not exist") } return generateLinMetadataPath(fs, key, outputs, children) } @@ -223,7 +223,7 @@ class LinPath implements Path, LogicalDataPath { def creationTime = toFileTime(navigate(object, 'createdAt') as OffsetDateTime ?: OffsetDateTime.now()) final output = children ? navigate(object, children.join('.')) : object if( !output ) { - throw new FileNotFoundException("Target path '$key#${children.join('.')}' does not exist.") + throw new FileNotFoundException("Target path '$key#${children.join('.')}' does not exist") } return new LinMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) } @@ -236,7 +236,7 @@ class LinPath implements Path, LogicalDataPath { if (children && children.size() > 0) realPath = realPath.resolve(children.join(SEPARATOR)) if (!realPath.exists()) - throw new FileNotFoundException("Target path '$realPath' does not exist.") + throw new FileNotFoundException("Target path '$realPath' does not exist") return realPath } @@ -455,7 +455,7 @@ class LinPath implements Path, LogicalDataPath { /** * Get the path associated to any metadata object. * @return Path associated to a DataOutput or LinMetadataFile with the metadata object for other types. - * @throws FileNotFoundException if the metadata associated to the LinPath does not exist. + * @throws FileNotFoundException if the metadata associated to the LinPath does not exist */ protected Path getTargetOrMetadataPath(){ return findTarget(fileSystem, filePath, true, parseChildrenFormFragment(fragment)) diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy index 895db4d1ea..363556632e 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy @@ -16,6 +16,8 @@ package nextflow.lineage +import java.time.ZoneId + import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath import nextflow.lineage.model.Parameter @@ -52,7 +54,7 @@ class LinUtilsTest extends Specification{ where: FILE_TIME | DATE null | null - FileTime.fromMillis(1234) | OffsetDateTime.ofInstant(Instant.ofEpochMilli(1234), ZoneOffset.UTC) + FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234).atZone(ZoneId.systemDefault())?.toOffsetDateTime() } def 'should convert to FileTime'(){ diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index 92c8ad79fc..39958481bd 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -110,8 +110,7 @@ class LinCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "No workflow runs LIDs found." - + stdout[0] == "No workflow runs found in lineage history log" } def 'should show lid content' (){ @@ -153,7 +152,7 @@ class LinCommandImplTest extends Specification{ then: stdout.size() == 1 - stdout[0] == "Error loading lid://12345. Lineage object 12345 not found." + stdout[0] == "Error loading lid://12345 - Lineage object 12345 not found" } def 'should get lineage lid content' (){ @@ -425,7 +424,7 @@ class LinCommandImplTest extends Specification{ .findResults { line -> !line.contains('DEBUG') ? line : null } .findResults { line -> !line.contains('INFO') ? line : null } .findResults { line -> !line.contains('plugin') ? line : null } - def expectedOutput = "Error lineage store not loaded. Check Nextflow configuration." + def expectedOutput = "Error lineage store not loaded - Check Nextflow configuration" then: stdout.size() == 4 stdout[0] == expectedOutput diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy index 7bab762441..f3a33b51c9 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy @@ -229,19 +229,19 @@ class LinPathTest extends Specification { LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["no-exist"] as String[]) then: def exception = thrown(FileNotFoundException) - exception.message == "Target path '123456#no-exist' does not exist." + exception.message == "Target path '123456#no-exist' does not exist" when: 'outputs does not exists' LinPath.getMetadataAsTargetPath(wf, lidFs, "6789", ["outputs"] as String[]) then: def exception1 = thrown(FileNotFoundException) - exception1.message == "Target path '6789#outputs' does not exist." + exception1.message == "Target path '6789#outputs' does not exist" when: 'null object' LinPath.getMetadataAsTargetPath(null, lidFs, "123456", ["no-exist"] as String[]) then: def exception2 = thrown(FileNotFoundException) - exception2.message == "Target path '123456' does not exist." + exception2.message == "Target path '123456' does not exist" cleanup: meta.resolve("123456").deleteDir() From 306fbaf0867a2cffd245d62a172c4533f80deaea Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 16 Apr 2025 14:47:57 +0200 Subject: [PATCH 56/72] Fix failing tests [ci fast] Signed-off-by: Paolo Di Tommaso --- .../src/test/groovy/nextflow/cli/CmdLineageTest.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index f33e886ee4..3722f69eae 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -117,7 +117,7 @@ class CmdLineageTest extends Specification { then: stdout.size() == 1 - stdout[0] == "No workflow runs LIDs found." + stdout[0] == "No workflow runs found in lineage history log" cleanup: folder?.deleteDir() @@ -179,7 +179,7 @@ class CmdLineageTest extends Specification { then: stdout.size() == 1 - stdout[0] == "Error loading lid://12345. Lineage object 12345 not found." + stdout[0] == "Error loading lid://12345 - Lineage object 12345 not found" cleanup: folder?.deleteDir() From 226bf65c7b8f8f3b8c5fab7a160edd9cce2c4a4d Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 16 Apr 2025 16:22:18 +0200 Subject: [PATCH 57/72] Add support for command aliases [ci fast] Signed-off-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/cli/CmdLineage.groovy | 2 +- .../src/main/groovy/nextflow/cli/Launcher.groovy | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy index cc46bbd573..7f53e0f49b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy @@ -34,7 +34,7 @@ import org.pf4j.ExtensionPoint * @author Paolo Di Tommaso */ @CompileStatic -@Parameters(commandDescription = "Explore workflows lineage metadata") +@Parameters(commandDescription = "Explore workflows lineage metadata", commandNames = ['li']) class CmdLineage extends CmdBase implements UsageAware { private static final String NAME = 'lineage' diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy index 5347f638a6..a17ad69325 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy @@ -121,13 +121,20 @@ class Launcher { options = new CliOptions() jcommander = new JCommander(options) - allCommands.each { cmd -> + for( CmdBase cmd : allCommands ) { cmd.launcher = this; - jcommander.addCommand(cmd.name, cmd) + jcommander.addCommand(cmd.name, cmd, aliases(cmd)) } jcommander.setProgramName( APP_NAME ) } + private static final String[] EMPTY = new String[0] + + private static String[] aliases(CmdBase cmd) { + final aliases = cmd.getClass().getAnnotation(Parameters)?.commandNames() + return aliases ?: EMPTY + } + /** * Create the Jcommander 'interpreter' and parse the command line arguments */ From 38735b0b4536244d27016c1d4414a2ce6024d284 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 16 Apr 2025 22:07:29 -0500 Subject: [PATCH 58/72] cleanup code style Signed-off-by: Ben Sherman --- build.gradle | 4 +- .../nextflow/processor/TaskProcessor.groovy | 4 +- .../groovy/nextflow/processor/TaskRun.groovy | 2 +- .../groovy/nextflow/script/OutputDsl.groovy | 16 ++--- .../nextflow/trace/TraceObserver.groovy | 7 ++- .../main/resources/META-INF/plugins-info.txt | 2 +- .../src/main/nextflow/file/FileHelper.groovy | 2 +- .../src/main/nextflow/serde/Encoder.groovy | 4 +- .../serde/gson/GStringSerializer.groovy | 2 +- .../nextflow/serde/gson/InstantAdapter.groovy | 9 ++- .../serde/gson/OffsetDateTimeAdapter.groovy | 7 +-- .../src/main/nextflow/util/TypeHelper.groovy | 2 +- .../nextflow/lineage/DefaultLinStore.groovy | 10 ++-- .../lineage/DefaultLinStoreFactory.groovy | 4 +- .../nextflow/lineage/LinHistoryRecord.groovy | 6 +- .../main/nextflow/lineage/LinObserver.groovy | 24 ++++---- .../lineage/LinPropertyValidator.groovy | 29 ++++++--- .../src/main/nextflow/lineage/LinUtils.groovy | 60 +++++++++++-------- .../lineage/cli/LinCommandImpl.groovy | 4 +- .../nextflow/lineage/fs/LinFileSystem.groovy | 5 +- .../main/nextflow/lineage/fs/LinPath.groovy | 7 +-- .../nextflow/lineage/h2/H2LinStore.groovy | 5 +- settings.gradle | 1 - 23 files changed, 118 insertions(+), 98 deletions(-) diff --git a/build.gradle b/build.gradle index 111d33c937..c2d2570bba 100644 --- a/build.gradle +++ b/build.gradle @@ -237,7 +237,7 @@ task compile { def getRuntimeConfigs() { def names = subprojects - .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-lineage'] } + .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs','nf-lang','nf-lineage'] } .collect { it.name } FileCollection result = null @@ -263,7 +263,7 @@ task exportClasspath { def home = System.getProperty('user.home') def all = getRuntimeConfigs() def libs = all.collect { File file -> /*println file.canonicalPath.replace(home, '$HOME');*/ file.canonicalPath; } - ['nextflow','nf-commons','nf-httpfs','nf-lang', 'nf-lineage'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } + ['nextflow','nf-commons','nf-httpfs','nf-lang','nf-lineage'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } file('.launch.classpath').text = libs.unique().join(':') } } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 2d75d53b32..8fb26579b3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -2280,7 +2280,7 @@ class TaskProcessor { * @return The list of paths of scripts in the project bin folder referenced in the task command */ @Memoized - public List getTaskBinEntries(String script) { + List getTaskBinEntries(String script) { List result = [] def tokenizer = new StringTokenizer(script," \t\n\r\f()[]{};&|<>`") while( tokenizer.hasMoreTokens() ) { @@ -2313,7 +2313,7 @@ class TaskProcessor { log.info(buffer.toString()) } - public Map getTaskGlobalVars(TaskRun task) { + Map getTaskGlobalVars(TaskRun task) { final result = task.getGlobalVars(ownerScript.binding) final directives = getTaskExtensionDirectiveVars(task) result.putAll(directives) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index 4633a808a0..20ab76ec36 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -991,7 +991,7 @@ class TaskRun implements Cloneable { return processor.session.getCondaConfig() } - String getStubSource(){ + String getStubSource() { return config?.getStubBlock()?.source } } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy index 6950b5e933..1c7898b12c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy @@ -108,6 +108,14 @@ class OutputDsl { private Map opts = [:] + void annotations(Map value) { + setOption('annotations', value) + } + + void annotations(Closure value) { + setOption('annotations', value) + } + void contentType(String value) { setOption('contentType', value) } @@ -162,14 +170,6 @@ class OutputDsl { setOption('tags', value) } - void annotations(Map value) { - setOption('annotations', value) - } - - void annotations(Closure value) { - setOption('annotations', value) - } - private void setOption(String name, Object value) { if( opts.containsKey(name) ) throw new ScriptRuntimeException("Publish option `${name}` cannot be defined more than once for a workflow output") diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy index ee937a1f53..b4b08f3fee 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy @@ -156,13 +156,16 @@ interface TraceObserver { } /** - * Method that is invoked when a output file is annotated + * Method that is invoke when an output file is published. + * * @param destination * The destination path at `publishDir` folder. + * @param source + * The source path at `workDir` folder. * @param annotations * The annotations attached to this file */ - default void onFilePublish(Path destination, Path source, Map annotations){ + default void onFilePublish(Path destination, Path source, Map annotations) { onFilePublish(destination, source) } diff --git a/modules/nextflow/src/main/resources/META-INF/plugins-info.txt b/modules/nextflow/src/main/resources/META-INF/plugins-info.txt index 4a9fbbbb6e..89155b3dcd 100644 --- a/modules/nextflow/src/main/resources/META-INF/plugins-info.txt +++ b/modules/nextflow/src/main/resources/META-INF/plugins-info.txt @@ -6,4 +6,4 @@ nf-console@1.2.1 nf-google@1.19.0 nf-k8s@1.0.0 nf-tower@1.11.2 -nf-wave@1.11.1 +nf-wave@1.11.1 \ No newline at end of file diff --git a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy index be885ed40e..c72cdc2f87 100644 --- a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy @@ -238,7 +238,7 @@ class FileHelper { return !(path.getFileSystem().provider().scheme in UNSUPPORTED_GLOB_WILDCARDS) } - static Path toPath(value){ + static Path toPath(value) { if( value==null ) return null diff --git a/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy b/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy index f8a0fb48e0..cdbc8fc5cb 100644 --- a/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy @@ -14,7 +14,7 @@ * limitations under the License. */ -package nextflow.serde; +package nextflow.serde /** * An interface for encoding and decoding objects between two types. @@ -40,6 +40,6 @@ interface Encoder { * @param encoded the encoded representation to decode * @return the decoded object */ - T decode(S encoded); + T decode(S encoded) } diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy index 3f088c87e0..8339093f1a 100644 --- a/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy @@ -34,6 +34,6 @@ class GStringSerializer implements JsonSerializer { @Override JsonElement serialize(GString src, Type typeOfSrc, JsonSerializationContext context) { // Convert GString to plain String - return new JsonPrimitive(src.toString()); + return new JsonPrimitive(src.toString()) } } diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy index cfed8e5c69..80b64676a2 100644 --- a/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy @@ -16,12 +16,11 @@ package nextflow.serde.gson -import com.google.gson.stream.JsonToken - import java.time.Instant import com.google.gson.TypeAdapter import com.google.gson.stream.JsonReader +import com.google.gson.stream.JsonToken import com.google.gson.stream.JsonWriter import groovy.transform.CompileStatic @@ -39,9 +38,9 @@ class InstantAdapter extends TypeAdapter { @Override Instant read(JsonReader reader) throws IOException { - if (reader.peek() == JsonToken.NULL) { - reader.nextNull(); - return null; + if( reader.peek() == JsonToken.NULL ) { + reader.nextNull() + return null } return Instant.parse(reader.nextString()) } diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy index 2bf19f0462..572f03d44d 100644 --- a/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy @@ -16,13 +16,12 @@ package nextflow.serde.gson -import com.google.gson.stream.JsonToken - import java.time.Instant import java.time.OffsetDateTime import com.google.gson.TypeAdapter import com.google.gson.stream.JsonReader +import com.google.gson.stream.JsonToken import com.google.gson.stream.JsonWriter import groovy.transform.CompileStatic @@ -41,8 +40,8 @@ class OffsetDateTimeAdapter extends TypeAdapter { @Override OffsetDateTime read(JsonReader reader) throws IOException { if (reader.peek() == JsonToken.NULL) { - reader.nextNull(); - return null; + reader.nextNull() + return null } return OffsetDateTime.parse(reader.nextString()) } diff --git a/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy b/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy index f9a42c1896..f7ab1604ec 100644 --- a/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy @@ -51,7 +51,7 @@ class TypeHelper { * */ static Type getGenericType(Object object, int index) { - final params = (ParameterizedType) (object.getClass().getGenericSuperclass()); + final params = (ParameterizedType) (object.getClass().getGenericSuperclass()) return params.getActualTypeArguments()[index] } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy index 28251a1012..fb64bbe2c7 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy @@ -16,15 +16,14 @@ package nextflow.lineage -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j - import java.nio.file.FileVisitResult import java.nio.file.FileVisitor import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j import nextflow.lineage.serde.LinEncoder import nextflow.lineage.serde.LinSerializable import nextflow.lineage.config.LineageConfig @@ -41,7 +40,7 @@ import nextflow.util.TestOnly @CompileStatic class DefaultLinStore implements LinStore { - private static String HISTORY_FILE_NAME =".history" + private static String HISTORY_FILE_NAME = ".history" private static final String METADATA_FILE = '.data.json' private static final String METADATA_PATH = '.meta' private static final String DEFAULT_LOCATION = 'lineage' @@ -51,7 +50,6 @@ class DefaultLinStore implements LinStore { private LinHistoryLog historyLog private LinEncoder encoder - DefaultLinStore open(LineageConfig config) { location = toLocationPath(config.store.location) metaLocation = location.resolve(METADATA_PATH) @@ -113,7 +111,7 @@ class DefaultLinStore implements LinStore { return searchAllFiles(params) } - private Map searchAllFiles (Map params) { + private Map searchAllFiles(Map params) { final results = new HashMap() Files.walkFileTree(metaLocation, new FileVisitor() { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy index 846c0ebbbd..881442bdb8 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy @@ -30,8 +30,8 @@ import nextflow.plugin.Priority @Priority(0) class DefaultLinStoreFactory extends LinStoreFactory { - private static Pattern SCHEME = ~/^([a-zA-Z][a-zA-Z\d+\-.]*):/ - private static List SUPPORTED_SCHEMES = ['file', 's3', 'gs', 'az'] + private static final Pattern SCHEME = ~/^([a-zA-Z][a-zA-Z\d+\-.]*):/ + private static final List SUPPORTED_SCHEMES = List.of('file', 's3', 'gs', 'az') @Override boolean canOpen(LineageConfig config) { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy index 31dc2d9478..fe75519c13 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy @@ -29,7 +29,9 @@ import java.text.SimpleDateFormat @CompileStatic @EqualsAndHashCode(includes = 'runName,sessionId') class LinHistoryRecord { - public static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') + + static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') + final Date timestamp final String runName final UUID sessionId @@ -58,7 +60,7 @@ class LinHistoryRecord { } static LinHistoryRecord parse(String line) { - def cols = line.tokenize('\t') + final cols = line.tokenize('\t') if (cols.size() == 4) { return new LinHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3]) } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy index f61559f23a..7b4402e570 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy @@ -16,15 +16,12 @@ package nextflow.lineage -import nextflow.util.SecretHelper - -import java.time.OffsetDateTime - import static nextflow.lineage.fs.LinPath.* import java.nio.file.Files import java.nio.file.Path import java.nio.file.attribute.BasicFileAttributes +import java.time.OffsetDateTime import groovy.transform.CompileStatic import groovy.util.logging.Slf4j @@ -43,7 +40,6 @@ import nextflow.file.FileHolder import nextflow.processor.TaskHandler import nextflow.processor.TaskRun import nextflow.script.ScriptMeta - import nextflow.script.params.BaseParam import nextflow.script.params.CmdEvalParam import nextflow.script.params.DefaultInParam @@ -62,6 +58,7 @@ import nextflow.trace.TraceObserver import nextflow.trace.TraceRecord import nextflow.util.CacheHelper import nextflow.util.PathNormalizer +import nextflow.util.SecretHelper import nextflow.util.TestOnly /** @@ -72,7 +69,7 @@ import nextflow.util.TestOnly @Slf4j @CompileStatic class LinObserver implements TraceObserver { - private static Map, String> TaskParamToValue = [ + private static Map, String> taskParamToValue = [ (StdOutParam) : "stdout", (StdInParam) : "stdin", (FileInParam) : "path", @@ -84,6 +81,7 @@ class LinObserver implements TraceObserver { (CmdEvalParam) : "eval", (EachInParam) : "each" ] + private String executionHash private LinStore store private Session session @@ -248,15 +246,15 @@ class LinObserver implements TraceObserver { } protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { - final codeChecksum = Checksum.ofNextflow(session.stubRun ? task.stubSource: task.source) + final codeChecksum = Checksum.ofNextflow(session.stubRun ? task.stubSource : task.source) final scriptChecksum = Checksum.ofNextflow(task.script) final value = new nextflow.lineage.model.TaskRun( session.uniqueId.toString(), task.getName(), codeChecksum, scriptChecksum, - task.inputs ? manageTaskInputParameters(task.inputs, normalizer): null, - task.isContainerEnabled() ? task.getContainerFingerprint(): null, + task.inputs ? manageTaskInputParameters(task.inputs, normalizer) : null, + task.isContainerEnabled() ? task.getContainerFingerprint() : null, normalizer.normalizePath(task.getCondaEnv()), normalizer.normalizePath(task.getSpackEnv()), task.config?.getArchitecture()?.toString(), @@ -375,6 +373,7 @@ class LinObserver implements TraceObserver { annotations.forEach { Object key, Object value -> converted.add(new Annotation(key.toString(), value)) } return converted } + String getSourceReference(Path source){ final hash = FileHelper.getTaskHashFromPath(source, session.workDir) if (hash) { @@ -397,7 +396,7 @@ class LinObserver implements TraceObserver { protected static String getParameterType(Object param) { if( param instanceof BaseParam ) - return TaskParamToValue.get(param.class) + return taskParamToValue.get(param.class) // return generic types if( param instanceof Path ) return Path.simpleName @@ -437,6 +436,7 @@ class LinObserver implements TraceObserver { void onFilePublish(Path destination, Path source, Map annotations){ storePublishedFile( destination, source, annotations) } + /** * Relativizes a path from the workflow's output dir. * @@ -463,7 +463,7 @@ class LinObserver implements TraceObserver { protected List manageTaskInputParameters(Map inputs, PathNormalizer normalizer) { List managedInputs = new LinkedList() - inputs.forEach{ param, value -> + inputs.forEach { param, value -> if( param instanceof FileInParam ) managedInputs.add( new Parameter( getParameterType(param), param.name, manageFileInParam( (List)value , normalizer) ) ) else if( !(param instanceof DefaultInParam) ) @@ -476,7 +476,7 @@ class LinObserver implements TraceObserver { final paths = new LinkedList(); for( FileHolder it : files ) { final ref = getSourceReference(it.storePath) - paths.add(ref ? ref : new DataPath( + paths.add(ref ?: new DataPath( normalizer.normalizePath(it.storePath), Checksum.ofNextflow(it.storePath)) ) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy index 35aa2ef1de..a620e55ada 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy @@ -29,26 +29,39 @@ import nextflow.lineage.model.WorkflowRun /** * Class to validate if the string refers to a property in the classes of the Lineage Metadata model. + * * @author Jorge Ejarque */ @CompileStatic class LinPropertyValidator { - private static List LID_MODEL_CLASSES = [Workflow, WorkflowRun, WorkflowOutputs, TaskRun, TaskOutputs, DataOutput, DataPath, Parameter, Checksum, Annotation] as List + private static final List LIN_MODEL_CLASSES = [ + Annotation, + Checksum, + DataOutput, + DataPath, + Parameter, + TaskOutputs, + TaskRun, + Workflow, + WorkflowOutputs, + WorkflowRun, + ] + private Set validProperties - LinPropertyValidator(){ + LinPropertyValidator() { this.validProperties = new HashSet() - for( Class clazz: LID_MODEL_CLASSES) { - for( MetaProperty field: clazz.metaClass.getProperties()) { + for( Class clazz : LIN_MODEL_CLASSES ) { + for( MetaProperty field : clazz.metaClass.getProperties() ) { validProperties.add( field.name) } } } void validate(Collection properties) { - for( String property: properties ) { - if( !(property in this.validProperties) ) { + for( String property : properties ) { + if( property !in this.validProperties ) { def msg = "Property '$property' doesn't exist in the lineage model." final matches = this.validProperties.closest(property) if( matches ) @@ -58,8 +71,8 @@ class LinPropertyValidator { } } - void validateQueryParams (Map params){ - for(String key: params.keySet()) { + void validateQueryParams(Map params) { + for( String key : params.keySet() ) { validate(key.tokenize('.')) } } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index 79df01aa6d..c6c012a3c2 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -41,6 +41,7 @@ class LinUtils { /** * Query a lineage store. + * * @param store lineage store to query. * @param uri Query to perform in a URI-like format. * Format 'lid://[?QueryString][#fragment]' where: @@ -82,6 +83,7 @@ class LinUtils { /** * Get the array of the search path children elements from the fragment string + * * @param fragment String containing the elements separated by '.' * @return array with the parsed element */ @@ -95,6 +97,7 @@ class LinUtils { /** * Search for objects inside a description + * * @param store lineage store * @param key lineage key where to perform the search * @param params Parameter-value pairs to be evaluated in the key @@ -126,6 +129,7 @@ class LinUtils { /** * Get a metadata sub-object. + * * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. * * @param store Store to retrieve lineage metadata objects. @@ -158,6 +162,7 @@ class LinUtils { /** * Evaluates object or the objects in a collection matches a set of parameter-value pairs. It includes in the results collection in case of match. + * * @param object Object or collection of objects to evaluate * @param params parameter-value pairs to evaluate in each object * @param results results collection to include the matching objects @@ -173,8 +178,10 @@ class LinUtils { results.add(object) } } + /** * Parses a query string and store them in parameter-value Map. + * * @param queryString URI-like query string. (e.g. param1=value1¶m2=value2). * @return Map containing the parameter-value pairs of the query string. */ @@ -210,7 +217,7 @@ class LinUtils { if( !value ) return false if( value instanceof Collection ) { - for( def v : value as Collection ) { + for( final v : value as Collection ) { if( v.toString() == expected.toString() ) return true } @@ -221,30 +228,32 @@ class LinUtils { /** * Retrieves the sub-object or value indicated by a path. + * * @param obj Object to navigate * @param path Elements path separated by '.' e.g. field.subfield * @return sub-object / value */ - static Object navigate(Object obj, String path){ + static Object navigate(Object obj, String path) { if (!obj) return null // type has been replaced by class when evaluating LidSerializable objects if (obj instanceof LinSerializable && path == 'type') - return obj.getClass()?.simpleName - try{ + return obj.getClass()?.simpleName + try { return path.tokenize('.').inject(obj) { current, key -> - return getSubPath(current, key) + getSubPath(current, key) } - } catch (Throwable e) { + } + catch (Throwable e) { log.debug("Error navigating to $path in object", e) return null } } private static Object getSubPath(current, String key) { - if (current == null) + if (current == null) { return null - + } if (current instanceof Map) { return current[key] // Navigate Map properties } @@ -258,19 +267,20 @@ class LinUtils { return null } - private static Object navigateCollection(Collection collection, String key) { - def results = [] - for (Object object: collection){ - final res = getSubPath(object, key) - if (res) results.add(res) - } - if (results.isEmpty() ) { - log.trace("No property found for $key") - return null - } - // Return a single object if only ine results is found. - return results.size() == 1 ? results[0] : results - } + private static Object navigateCollection(Collection collection, String key) { + final results = [] + for (Object object : collection) { + final res = getSubPath(object, key) + if (res) + results.add(res) + } + if (results.isEmpty() ) { + log.trace("No property found for $key") + return null + } + // Return a single object if only ine results is found. + return results.size() == 1 ? results[0] : results + } /** * Helper function to convert from FileTime to ISO 8601 with offset @@ -279,8 +289,8 @@ class LinUtils { * @param time File time to convert * @return The {@link OffsetDateTime} for the corresponding file time or null in case of not available (null) */ - static OffsetDateTime toDate(FileTime time){ - return time!=null + static OffsetDateTime toDate(FileTime time) { + return time != null ? time.toInstant().atZone(ZoneId.systemDefault()).toOffsetDateTime() : null } @@ -291,7 +301,7 @@ class LinUtils { * @param date ISO formated time * @return Converted FileTime or null if date is not available (null or 'N/A') */ - static FileTime toFileTime(OffsetDateTime date){ + static FileTime toFileTime(OffsetDateTime date) { if (!date) return null return FileTime.from(date.toInstant()) @@ -307,7 +317,7 @@ class LinUtils { * @return Output encoded as a JSON string */ static String encodeSearchOutputs(Object output, boolean prettyPrint) { - if (output instanceof LinSerializable){ + if (output instanceof LinSerializable) { return new LinEncoder().withPrettyPrint(prettyPrint).encode(output) } else { return new GsonEncoder() {} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index 2df2f3766f..f82548b0e7 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -16,8 +16,6 @@ package nextflow.lineage.cli -import nextflow.lineage.serde.LinEncoder - import static nextflow.lineage.fs.LinPath.* import java.nio.charset.StandardCharsets @@ -37,6 +35,7 @@ import nextflow.lineage.model.DataOutput import nextflow.lineage.model.Parameter import nextflow.lineage.model.TaskRun import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder import nextflow.script.params.FileInParam import nextflow.ui.TableBuilder import org.eclipse.jgit.diff.DiffAlgorithm @@ -51,6 +50,7 @@ import org.eclipse.jgit.diff.RawTextComparator */ @CompileStatic class LinCommandImpl implements CmdLineage.LinCommand { + private static Path DEFAULT_HTML_FILE = Path.of("lineage-render.html") @Canonical diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy index 222385a917..5802e2351b 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy @@ -16,9 +16,6 @@ package nextflow.lineage.fs -import nextflow.lineage.LinStore -import nextflow.lineage.LinStoreFactory - import java.nio.file.FileStore import java.nio.file.FileSystem import java.nio.file.Path @@ -28,6 +25,8 @@ import java.nio.file.attribute.UserPrincipalLookupService import java.nio.file.spi.FileSystemProvider import groovy.transform.CompileStatic +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory import nextflow.lineage.config.LineageConfig /** diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 0a88f0a995..1d313efa1d 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -16,11 +16,13 @@ package nextflow.lineage.fs +import groovy.transform.CompileStatic import groovy.util.logging.Slf4j +import nextflow.file.FileHelper +import nextflow.file.LogicalDataPath import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataOutput import nextflow.lineage.serde.LinSerializable -import nextflow.file.LogicalDataPath import nextflow.util.CacheHelper import nextflow.util.TestOnly @@ -36,9 +38,6 @@ import java.nio.file.WatchKey import java.nio.file.WatchService import java.time.OffsetDateTime -import groovy.transform.CompileStatic -import nextflow.file.FileHelper - /** * LID file system path * diff --git a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy index 0fb590c21d..c109cc3931 100644 --- a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy +++ b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy @@ -17,11 +17,10 @@ package nextflow.lineage.h2 -import groovy.json.JsonSlurper - import java.sql.Clob import com.zaxxer.hikari.HikariDataSource +import groovy.json.JsonSlurper import groovy.sql.Sql import groovy.transform.CompileStatic import groovy.util.logging.Slf4j @@ -142,6 +141,7 @@ class H2LinStore implements LinStore { } return results } + /** * JSON_MATCH implementation for h2 * @param jsonString @@ -159,7 +159,6 @@ class H2LinStore implements LinStore { dataSource.close() } - @TestOnly void truncateAllTables() { try(final sql=new Sql(dataSource)) { diff --git a/settings.gradle b/settings.gradle index 6e89b4728f..4d5abccb4d 100644 --- a/settings.gradle +++ b/settings.gradle @@ -28,7 +28,6 @@ include 'nf-commons' include 'nf-httpfs' include 'nf-lang' include 'nf-lineage' -include 'nf-lang' rootProject.children.each { prj -> prj.projectDir = new File("$rootDir/modules/$prj.name") From 6d71bc96d4c18ff4059f16413cfd1178210b082d Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 16 Apr 2025 22:09:19 -0500 Subject: [PATCH 59/72] replace nested ifs with if-guards Signed-off-by: Ben Sherman --- .../src/main/nextflow/file/FileHelper.groovy | 28 +++++++++---------- .../nextflow/lineage/LinHistoryRecord.groovy | 11 ++++---- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy index c72cdc2f87..e2f7651019 100644 --- a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy @@ -1174,20 +1174,20 @@ class FileHelper { public static HashCode getTaskHashFromPath(Path sourcePath, Path workPath) { assert sourcePath assert workPath - if (sourcePath.startsWith(workPath)) { - Path relativePath = workPath.relativize(sourcePath) - if (relativePath.getNameCount() >= 2) { - final bucket = relativePath.getName(0).toString() - if (bucket.size() == 2) { - final strHash = bucket + relativePath.getName(1).toString() - try { - return HashCode.fromString(strHash) - } catch (Throwable e) { - log.debug("String '${strHash}' is not a valid hash", e) - } - } - } + if( !sourcePath.startsWith(workPath) ) + return null + final relativePath = workPath.relativize(sourcePath) + if( relativePath.getNameCount() < 2 ) + return null + final bucket = relativePath.getName(0).toString() + if( bucket.size() != 2 ) + return null + final strHash = bucket + relativePath.getName(1).toString() + try { + return HashCode.fromString(strHash) + } catch (Throwable e) { + log.debug("String '${strHash}' is not a valid hash", e) + return null } - return null } } diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy index fe75519c13..366209215d 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy @@ -47,11 +47,12 @@ class LinHistoryRecord { protected LinHistoryRecord() {} List toList() { - def line = new ArrayList(4) - line << (timestamp ? TIMESTAMP_FMT.format(timestamp) : '-') - line << (runName ?: '-') - line << (sessionId.toString()) - line << (runLid ?: '-') + return List.of( + timestamp ? TIMESTAMP_FMT.format(timestamp) : '-', + runName ?: '-', + sessionId.toString(), + runLid ?: '-', + ) } @Override From 52e3333fc639dc6f36df8e2c616ddb72dad240d1 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 16 Apr 2025 22:09:53 -0500 Subject: [PATCH 60/72] change default render file name to `lineage.html` Signed-off-by: Ben Sherman --- .../src/main/nextflow/lineage/cli/LinCommandImpl.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index f82548b0e7..4ccc238638 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -51,7 +51,7 @@ import org.eclipse.jgit.diff.RawTextComparator @CompileStatic class LinCommandImpl implements CmdLineage.LinCommand { - private static Path DEFAULT_HTML_FILE = Path.of("lineage-render.html") + private static Path DEFAULT_HTML_FILE = Path.of("lineage.html") @Canonical static class Edge { From baed1ad9d38fb2983d9b580d42e5c3c72efe23a1 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 16 Apr 2025 22:12:31 -0500 Subject: [PATCH 61/72] fix typo Signed-off-by: Ben Sherman --- modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy | 4 ++-- .../nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy | 4 ++-- .../nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index c6c012a3c2..22ea405dd0 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -56,7 +56,7 @@ class LinUtils { return globalSearch(store, uri) } else { final parameters = uri.query ? parseQuery(uri.query) : null - final children = parseChildrenFormFragment(uri.fragment) + final children = parseChildrenFromFragment(uri.fragment) return searchPath(store, key, parameters, children ) } } @@ -87,7 +87,7 @@ class LinUtils { * @param fragment String containing the elements separated by '.' * @return array with the parsed element */ - static String[] parseChildrenFormFragment(String fragment) { + static String[] parseChildrenFromFragment(String fragment) { if( !fragment ) return EMPTY_ARRAY final children = fragment.tokenize('.') diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 1d313efa1d..cfd66c8547 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -448,7 +448,7 @@ class LinPath implements Path, LogicalDataPath { * @throws FileNotFoundException if the metadata associated to the LinPath does not exist or its type is not a DataOutput. */ protected Path getTargetPath() { - return findTarget(fileSystem, filePath, false, parseChildrenFormFragment(fragment)) + return findTarget(fileSystem, filePath, false, parseChildrenFromFragment(fragment)) } /** @@ -457,7 +457,7 @@ class LinPath implements Path, LogicalDataPath { * @throws FileNotFoundException if the metadata associated to the LinPath does not exist */ protected Path getTargetOrMetadataPath(){ - return findTarget(fileSystem, filePath, true, parseChildrenFormFragment(fragment)) + return findTarget(fileSystem, filePath, true, parseChildrenFromFragment(fragment)) } @Override diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy index 363556632e..c471febb67 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy @@ -113,7 +113,7 @@ class LinUtilsTest extends Specification{ def "should parse children elements form Fragment string"() { expect: - LinUtils.parseChildrenFormFragment(FRAGMENT) == EXPECTED as String[] + LinUtils.parseChildrenFromFragment(FRAGMENT) == EXPECTED as String[] where: FRAGMENT | EXPECTED From e7f437bd33f6c4c6849b003c2dd132412f6f014b Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 16 Apr 2025 23:12:49 -0500 Subject: [PATCH 62/72] cleanup whitespace Signed-off-by: Ben Sherman --- .../nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index cfd66c8547..3f7d23dc2e 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -84,7 +84,6 @@ class LinPath implements Path, LogicalDataPath { this.filePath = filepath } - LinPath(LinFileSystem fs, String path) { this( fs, asUri( LID_PROT + norm0(path)) ) } @@ -138,11 +137,9 @@ class LinPath implements Path, LogicalDataPath { @TestOnly protected String getFilePath(){ this.filePath } - - /** - * Finds the target path of a LinPath. + * * @param fs LinFileSystem associated to the LinPath to find * @param filePath Path associated to the LinPath to find * @param resultsAsPath True to return metadata descriptions as LinMetadataPath @@ -441,6 +438,7 @@ class LinPath implements Path, LogicalDataPath { Path toTargetPath() { return getTargetOrMetadataPath() } + /** * Get the path associated to a DataOutput metadata. * @@ -453,6 +451,7 @@ class LinPath implements Path, LogicalDataPath { /** * Get the path associated to any metadata object. + * * @return Path associated to a DataOutput or LinMetadataFile with the metadata object for other types. * @throws FileNotFoundException if the metadata associated to the LinPath does not exist */ From 52664cebd28b7d1d6553c2e7cd3d7217d4e1164c Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 16 Apr 2025 23:19:26 -0500 Subject: [PATCH 63/72] don't wrap singleton file output as a list Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/extension/PublishOp.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index caa144e5c1..68c16aad46 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -97,7 +97,7 @@ class PublishOp { ? [saveAs: targetResolver] : [path: targetResolver] - if (publishOpts.annotations instanceof Closure){ + if( publishOpts.annotations instanceof Closure ) { final annotations = publishOpts.annotations as Closure overrides.annotations = annotations.call(value) as Map } @@ -262,7 +262,7 @@ class PublishOp { */ protected Object normalizePaths(value, targetResolver) { if( value instanceof Path ) { - return List.of(value.getBaseName(), normalizePath(value, targetResolver)) + return normalizePath(value, targetResolver) } if( value instanceof Collection ) { From 36d5c8211620aa95d5f8b37b42116b23c5c697e6 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 16 Apr 2025 23:39:31 -0500 Subject: [PATCH 64/72] fix failing test Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/util/CsvWriter.groovy | 13 ++++++++++++- .../groovy/nextflow/script/OutputDslTest.groovy | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy index c698676caf..b14b2cfe81 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy @@ -62,14 +62,25 @@ class CsvWriter { ? record.subMap(columns).values() : record.values() } + else if( isSerializable(record) ) { + values = [ record ] + } else { - throw new IllegalArgumentException('Records must be list or map objects') + throw new IllegalArgumentException("Record of type `${record.class.name}` can not be serialized to CSV") } path << values.collect(v -> "\"${toCsvString(v)}\"").join(sep) << '\n' } } + private static boolean isSerializable(value) { + return value == null + || value instanceof Boolean + || value instanceof CharSequence + || value instanceof Number + || value instanceof Path + } + private static String toCsvString(value) { if( value == null ) return "" diff --git a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy index f938d24821..de538fd5bc 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy @@ -77,7 +77,7 @@ class OutputDslTest extends Specification { outputDir.resolve('foo/file1.txt').text == 'Hello' outputDir.resolve('barbar/file2.txt').text == 'world' outputDir.resolve('index.csv').text == """\ - "file2","${outputDir}/barbar/file2.txt" + "${outputDir}/barbar/file2.txt" """.stripIndent() and: 1 * session.notifyFilePublish(outputDir.resolve('foo/file1.txt'), file1, null) From 10e7e7e5d1fbd78001e37fa3c9c99257c7270596 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 22 Apr 2025 11:20:42 +0200 Subject: [PATCH 65/72] remove unnecesary method in taskId Signed-off-by: jorgee --- .../nextflow/src/main/groovy/nextflow/processor/TaskId.groovy | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy index fbd4784a05..3576b9fde7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskId.groovy @@ -38,8 +38,6 @@ class TaskId extends Number implements Comparable, Serializable, Cloneable { private final int value - int getValue() { value } - static TaskId of( value ) { if( value instanceof Integer ) return new TaskId(value) From 119d2f8bd4f753486e0ec19fcf1696a5b1dde305 Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 22 Apr 2025 15:40:20 +0200 Subject: [PATCH 66/72] renames and small fixes Signed-off-by: jorgee --- .../groovy/nextflow/cli/CmdLineage.groovy | 6 +-- .../nextflow/extension/PublishOp.groovy | 7 ++- .../groovy/nextflow/cli/CmdLineageTest.groovy | 22 ++++---- .../main/nextflow/lineage/LinObserver.groovy | 31 ++++++------ .../lineage/LinPropertyValidator.groovy | 8 +-- .../src/main/nextflow/lineage/LinUtils.groovy | 4 +- .../lineage/cli/LinCommandImpl.groovy | 11 ++-- .../main/nextflow/lineage/fs/LinPath.groovy | 14 +++--- .../{DataOutput.groovy => FileOutput.groovy} | 2 +- .../{TaskOutputs.groovy => TaskOutput.groovy} | 4 +- .../nextflow/lineage/model/TaskRun.groovy | 6 +-- ...owOutputs.groovy => WorkflowOutput.groovy} | 6 +-- .../nextflow/lineage/model/WorkflowRun.groovy | 2 +- .../nextflow/lineage/serde/LinEncoder.groovy | 12 ++--- .../lineage/DefaultLinStoreTest.groovy | 14 +++--- .../nextflow/lineage/LinObserverTest.groovy | 50 +++++++++---------- .../lineage/LinPropertyValidationTest.groovy | 2 +- .../test/nextflow/lineage/LinUtilsTest.groovy | 16 +++--- .../lineage/cli/LinCommandImplTest.groovy | 42 ++++++++-------- .../fs/LinFileSystemProviderTest.groovy | 10 ++-- .../nextflow/lineage/fs/LinPathTest.groovy | 30 +++++------ .../lineage/serde/LinEncoderTest.groovy | 44 ++++++++-------- .../nextflow/lineage/h2/H2LinStoreTest.groovy | 12 ++--- 23 files changed, 179 insertions(+), 176 deletions(-) rename modules/nf-lineage/src/main/nextflow/lineage/model/{DataOutput.groovy => FileOutput.groovy} (97%) rename modules/nf-lineage/src/main/nextflow/lineage/model/{TaskOutputs.groovy => TaskOutput.groovy} (94%) rename modules/nf-lineage/src/main/nextflow/lineage/model/{WorkflowOutputs.groovy => WorkflowOutput.groovy} (92%) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy index 7f53e0f49b..3c27b9fb81 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy @@ -152,12 +152,12 @@ class CmdLineage extends CmdBase implements UsageAware { @Override String getName() { - return 'log' + return 'list' } @Override String getDescription() { - return 'Print the lineage execution log' + return 'List the executions with lineage enabled' } @Override @@ -181,7 +181,7 @@ class CmdLineage extends CmdBase implements UsageAware { @Override String getName() { - return 'describe' + return 'view' } @Override diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index 68c16aad46..47f6429999 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -219,7 +219,12 @@ class PublishOp { else { log.warn "Invalid extension '${ext}' for index file '${indexPath}' -- should be CSV, JSON, or YAML" } - session.notifyFilePublish(indexPath, null, publishOpts.tags as Map) + def annotations = publishOpts.annotations + if( publishOpts.annotations instanceof Closure ) { + final annotationClosure = publishOpts.annotations as Closure + annotations = annotationClosure.call() as Map + } + session.notifyFilePublish(indexPath, null, annotations as Map) } log.trace "Publish operator complete" diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index 3722f69eae..a7172abf9f 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -23,7 +23,7 @@ import nextflow.lineage.DefaultLinHistoryLog import nextflow.lineage.LinHistoryRecord import nextflow.lineage.LinStoreFactory import nextflow.lineage.model.Checksum -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.Parameter import nextflow.lineage.model.TaskRun import nextflow.lineage.serde.LinEncoder @@ -135,13 +135,13 @@ class CmdLineageTest extends Specification { } def time = OffsetDateTime.now() def encoder = new LinEncoder().withPrettyPrint(true) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam","lid://12345/","lid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer lidFile.text = jsonSer when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid://12345"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid://12345"]) lidCmd.run() def stdout = capture .toString() @@ -168,7 +168,7 @@ class CmdLineageTest extends Specification { } when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid://12345"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid://12345"]) lidCmd.run() def stdout = capture .toString() @@ -206,10 +206,10 @@ class CmdLineageTest extends Specification { Files.createDirectories(lidFile5.parent) def encoder = new LinEncoder() def time = OffsetDateTime.now() - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://45678",null, 1234, time, time, null) lidFile.text = encoder.encode(entry) - entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987", "lid://45678", "lid://123987", 1234, time, time, null) lidFile2.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "foo", @@ -219,7 +219,7 @@ class CmdLineageTest extends Specification { new Parameter("path","reads",["lid://45678/output.txt"])], null, null, null, null, [:],[], null) lidFile3.text = encoder.encode(entry) - entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://45678", "lid://45678", null, 1234, time, time, null) lidFile4.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "bar", @@ -277,13 +277,13 @@ class CmdLineageTest extends Specification { } def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.now() - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer lidFile.text = jsonSer when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid:///?type=DataOutput"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=DataOutput"]) lidCmd.run() def stdout = capture .toString() @@ -312,13 +312,13 @@ class CmdLineageTest extends Specification { } def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.now() - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer lidFile.text = jsonSer when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["describe", "lid:///?type=DataOutput"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=DataOutput"]) lidCmd.run() def stdout = capture .toString() diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy index 7b4402e570..081a5bba7a 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy @@ -28,12 +28,12 @@ import groovy.util.logging.Slf4j import nextflow.Session import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.DataPath import nextflow.lineage.model.Parameter -import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.TaskOutput import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowOutput import nextflow.lineage.model.WorkflowRun import nextflow.file.FileHelper import nextflow.file.FileHolder @@ -85,7 +85,7 @@ class LinObserver implements TraceObserver { private String executionHash private LinStore store private Session session - private WorkflowOutputs workflowOutputs + private WorkflowOutput workflowOutput private Map outputsStoreDirLid = new HashMap(10) private PathNormalizer normalizer @@ -113,7 +113,7 @@ class LinObserver implements TraceObserver { normalizer = new PathNormalizer(session.workflowMetadata) executionHash = storeWorkflowRun(normalizer) final executionUri = asUriString(executionHash) - workflowOutputs = new WorkflowOutputs( + workflowOutput = new WorkflowOutput( OffsetDateTime.now(), executionUri, new LinkedList() @@ -123,10 +123,10 @@ class LinObserver implements TraceObserver { @Override void onFlowComplete(){ - if (this.workflowOutputs){ - workflowOutputs.createdAt = OffsetDateTime.now() - final key = executionHash + '#outputs' - this.store.save(key, workflowOutputs) + if (this.workflowOutput){ + workflowOutput.createdAt = OffsetDateTime.now() + final key = executionHash + '#output' + this.store.save(key, workflowOutput) } } @@ -195,8 +195,8 @@ class LinObserver implements TraceObserver { protected String storeTaskResults(TaskRun task, PathNormalizer normalizer){ final outputParams = getNormalizedTaskOutputs(task, normalizer) - final value = new TaskOutputs( asUriString(task.hash.toString()), asUriString(executionHash), OffsetDateTime.now(), outputParams ) - final key = task.hash.toString() + '#outputs' + final value = new TaskOutput( asUriString(task.hash.toString()), asUriString(executionHash), OffsetDateTime.now(), outputParams ) + final key = task.hash.toString() + '#output' store.save(key,value) return key } @@ -247,12 +247,11 @@ class LinObserver implements TraceObserver { protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { final codeChecksum = Checksum.ofNextflow(session.stubRun ? task.stubSource : task.source) - final scriptChecksum = Checksum.ofNextflow(task.script) final value = new nextflow.lineage.model.TaskRun( session.uniqueId.toString(), task.getName(), codeChecksum, - scriptChecksum, + task.script, task.inputs ? manageTaskInputParameters(task.inputs, normalizer) : null, task.isContainerEnabled() ? task.getContainerFingerprint() : null, normalizer.normalizePath(task.getCondaEnv()), @@ -277,7 +276,7 @@ class LinObserver implements TraceObserver { final attrs = readAttributes(path) final key = getTaskOutputKey(task, path) final checksum = Checksum.ofNextflow(path) - final value = new DataOutput( + final value = new FileOutput( path.toUriString(), checksum, asUriString(task.hash.toString()), @@ -350,7 +349,7 @@ class LinObserver implements TraceObserver { final key = getWorkflowOutputKey(destination) final sourceReference = source ? getSourceReference(source) : asUriString(executionHash) final attrs = readAttributes(destination) - final value = new DataOutput( + final value = new FileOutput( destination.toUriString(), checksum, sourceReference, @@ -391,7 +390,7 @@ class LinObserver implements TraceObserver { @Override void onWorkflowPublish(String name, Object value){ - workflowOutputs.outputs.add(new Parameter(getParameterType(value), name, convertPathsToLidReferences(value))) + workflowOutput.output.add(new Parameter(getParameterType(value), name, convertPathsToLidReferences(value))) } protected static String getParameterType(Object param) { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy index a620e55ada..a89f7fc692 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy @@ -21,10 +21,10 @@ import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath import nextflow.lineage.model.Parameter -import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.TaskOutput import nextflow.lineage.model.TaskRun import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowOutput import nextflow.lineage.model.WorkflowRun /** @@ -41,10 +41,10 @@ class LinPropertyValidator { DataOutput, DataPath, Parameter, - TaskOutputs, + TaskOutput, TaskRun, Workflow, - WorkflowOutputs, + WorkflowOutput, WorkflowRun, ] diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy index 22ea405dd0..dfb5a4e634 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -141,7 +141,7 @@ class LinUtils { static Object getSubObject(LinStore store, String key, LinSerializable object, String[] children) { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description - final outputs = store.load("${key}#outputs") + final outputs = store.load("${key}#output") if (!outputs) return null return navigate(outputs, children.join('.')) @@ -157,7 +157,7 @@ class LinUtils { * @return return 'true' if the parent is a Task/Workflow run and the first element in children is 'outputs'. Otherwise 'false' */ static boolean isSearchingOutputs(LinSerializable object, String[] children) { - return (object instanceof WorkflowRun || object instanceof TaskRun) && children && children[0] == 'outputs' + return (object instanceof WorkflowRun || object instanceof TaskRun) && children && children[0] == 'output' } /** diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index 4ccc238638..46202a9d51 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -31,12 +31,11 @@ import nextflow.lineage.LinHistoryRecord import nextflow.lineage.LinStore import nextflow.lineage.LinStoreFactory import nextflow.lineage.LinUtils -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.Parameter import nextflow.lineage.model.TaskRun import nextflow.lineage.model.WorkflowRun import nextflow.lineage.serde.LinEncoder -import nextflow.script.params.FileInParam import nextflow.ui.TableBuilder import org.eclipse.jgit.diff.DiffAlgorithm import org.eclipse.jgit.diff.DiffFormatter @@ -156,8 +155,8 @@ class LinCommandImpl implements CmdLineage.LinCommand { final key = nodeToRender.substring(LID_PROT.size()) final lidObject = store.load(key) switch (lidObject.getClass()) { - case DataOutput: - processDataOutput(lidObject as DataOutput, lines, nodeToRender, nodes, edges) + case FileOutput: + processDataOutput(lidObject as FileOutput, lines, nodeToRender, nodes, edges) break; case WorkflowRun: @@ -175,7 +174,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { private void processTaskRun(TaskRun taskRun, List lines, String nodeToRender, LinkedList nodes, LinkedList edges) { lines << " ${nodeToRender}@{shape: process, label: \"${taskRun.name} [$nodeToRender]\"}".toString() - final parameters = taskRun.inputs + final parameters = taskRun.input for (Parameter source : parameters) { if (source.type.equals("path")) { manageFileInParam(lines, nodeToRender, nodes, edges, source.value) @@ -199,7 +198,7 @@ class LinCommandImpl implements CmdLineage.LinCommand { } } - private void processDataOutput(DataOutput lidObject, List lines, String nodeToRender, LinkedList nodes, LinkedList edges){ + private void processDataOutput(FileOutput lidObject, List lines, String nodeToRender, LinkedList nodes, LinkedList edges){ lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); final source = lidObject.source if(! source ) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 3f7d23dc2e..0294f1501e 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -21,7 +21,7 @@ import groovy.util.logging.Slf4j import nextflow.file.FileHelper import nextflow.file.LogicalDataPath import nextflow.lineage.model.Checksum -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.serde.LinSerializable import nextflow.util.CacheHelper import nextflow.util.TestOnly @@ -109,7 +109,7 @@ class LinPath implements Path, LogicalDataPath { return first } - protected static void validateDataOutput(DataOutput lidObject) { + protected static void validateDataOutput(FileOutput lidObject) { final hashedPath = FileHelper.toCanonicalPath(lidObject.path as String) if( !hashedPath.exists() ) throw new FileNotFoundException("Target path $lidObject.path does not exist") @@ -159,7 +159,7 @@ class LinPath implements Path, LogicalDataPath { throw new Exception("Lineage store not found - Check Nextflow configuration") final object = store.load(filePath) if ( object ){ - if( object instanceof DataOutput ) { + if( object instanceof FileOutput ) { return getTargetPathFromOutput(object, children) } if( resultsAsPath ){ @@ -204,9 +204,9 @@ class LinPath implements Path, LogicalDataPath { static LinMetadataPath getSubObjectAsPath(LinFileSystem fs, String key, LinSerializable object, String[] children) { if( isSearchingOutputs(object, children) ) { // When asking for a Workflow or task output retrieve the outputs description - final outputs = fs.store.load("${key}/outputs") + final outputs = fs.store.load("${key}/output") if( !outputs ) { - throw new FileNotFoundException("Target path '$key#outputs' does not exist") + throw new FileNotFoundException("Target path '$key#output' does not exist") } return generateLinMetadataPath(fs, key, outputs, children) } @@ -224,8 +224,8 @@ class LinPath implements Path, LogicalDataPath { return new LinMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) } - private static Path getTargetPathFromOutput(DataOutput object, String[] children) { - final lidObject = object as DataOutput + private static Path getTargetPathFromOutput(FileOutput object, String[] children) { + final lidObject = object as FileOutput // return the real path stored in the metadata validateDataOutput(lidObject) def realPath = FileHelper.toCanonicalPath(lidObject.path as String) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/DataOutput.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/FileOutput.groovy similarity index 97% rename from modules/nf-lineage/src/main/nextflow/lineage/model/DataOutput.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/FileOutput.groovy index 41edf3c501..439a07c780 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/model/DataOutput.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/FileOutput.groovy @@ -29,7 +29,7 @@ import java.time.OffsetDateTime */ @Canonical @CompileStatic -class DataOutput implements LinSerializable { +class FileOutput implements LinSerializable { /** * Real path of the output data. */ diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutputs.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutput.groovy similarity index 94% rename from modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutputs.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutput.groovy index c995044ea8..6af10f4e0f 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutputs.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskOutput.groovy @@ -29,7 +29,7 @@ import java.time.OffsetDateTime */ @Canonical @CompileStatic -class TaskOutputs implements LinSerializable { +class TaskOutput implements LinSerializable { /** * Reference to the task that generated the data. */ @@ -45,7 +45,7 @@ class TaskOutputs implements LinSerializable { /** * Outputs of the task */ - List outputs + List output /** * Annotations attached to the task outputs */ diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy index ec9c35af0c..ebcc7c4b36 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy @@ -43,11 +43,11 @@ class TaskRun implements LinSerializable { /** * Checksum of the task script */ - Checksum scriptChecksum + String script /** - * Task run inputs + * Task run input */ - List inputs + List input /** * Container used for the task run */ diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutputs.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutput.groovy similarity index 92% rename from modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutputs.groovy rename to modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutput.groovy index e9d727bdd7..134e2cebed 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutputs.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutput.groovy @@ -29,7 +29,7 @@ import java.time.OffsetDateTime */ @Canonical @CompileStatic -class WorkflowOutputs implements LinSerializable { +class WorkflowOutput implements LinSerializable { /** * Creation date of the workflow outputs description */ @@ -39,9 +39,9 @@ class WorkflowOutputs implements LinSerializable { */ String workflowRun /** - * Workflow outputs + * Workflow output */ - List outputs + List output /** * Annotations attached to the workflow outputs */ diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy index 0bfc615863..2676432ed8 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy @@ -47,7 +47,7 @@ class WorkflowRun implements LinSerializable { /** * Resolved Configuration */ - Map resolvedConfig + Map config /** * Annotations attached to the workflow run */ diff --git a/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy index 7c50d27d52..334696e858 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy @@ -17,11 +17,11 @@ package nextflow.lineage.serde import groovy.transform.CompileStatic -import nextflow.lineage.model.DataOutput -import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.TaskOutput import nextflow.lineage.model.TaskRun import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowOutput import nextflow.lineage.model.WorkflowRun import nextflow.serde.gson.GsonEncoder import nextflow.serde.gson.RuntimeTypeAdapterFactory @@ -43,11 +43,11 @@ class LinEncoder extends GsonEncoder { static RuntimeTypeAdapterFactory newLidTypeAdapterFactory(){ RuntimeTypeAdapterFactory.of(LinSerializable.class, "type") .registerSubtype(WorkflowRun, WorkflowRun.simpleName) - .registerSubtype(WorkflowOutputs, WorkflowOutputs.simpleName) + .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) .registerSubtype(Workflow, Workflow.simpleName) .registerSubtype(TaskRun, TaskRun.simpleName) - .registerSubtype(TaskOutputs, TaskOutputs.simpleName) - .registerSubtype(DataOutput, DataOutput.simpleName) + .registerSubtype(TaskOutput, TaskOutput.simpleName) + .registerSubtype(FileOutput, FileOutput.simpleName) } } diff --git a/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy index e2443d27ad..db135923d4 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy @@ -26,7 +26,7 @@ import java.time.ZoneOffset import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.Parameter import nextflow.lineage.model.Workflow import nextflow.lineage.model.WorkflowRun @@ -70,7 +70,7 @@ class DefaultLinStoreTest extends Specification { def "save should store value in the correct file location"() { given: def key = "testKey" - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) + def value = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) def lidStore = new DefaultLinStore() lidStore.open(config) @@ -86,7 +86,7 @@ class DefaultLinStoreTest extends Specification { def "load should retrieve stored value correctly"() { given: def key = "testKey" - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) + def value = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) def lidStore = new DefaultLinStore() lidStore.open(config) lidStore.save(key, value) @@ -113,11 +113,11 @@ class DefaultLinStoreTest extends Specification { def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) def key2 = "testKey2" - def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) + def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) def key3 = "testKey3" - def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key2","value2"), new Annotation("key3","value3")]) + def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key2","value2"), new Annotation("key3","value3")]) def key4 = "testKey4" - def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) + def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) def lidStore = new DefaultLinStore() lidStore.open(config) @@ -127,7 +127,7 @@ class DefaultLinStoreTest extends Specification { lidStore.save(key4, value4) when: - def results = lidStore.search("type=DataOutput&annotations.key=key2&annotations.value=value2") + def results = lidStore.search("type=FileOutput&annotations.key=key2&annotations.value=value2") then: results.size() == 2 results.keySet().containsAll([key2,key3]) diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy index 0695f07806..04610fff2b 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy @@ -18,7 +18,7 @@ package nextflow.lineage import nextflow.lineage.model.Parameter -import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.TaskOutput import nextflow.file.FileHolder import nextflow.processor.TaskHandler import nextflow.script.TokenVar @@ -42,10 +42,10 @@ import java.nio.file.attribute.BasicFileAttributes import com.google.common.hash.HashCode import nextflow.Session import nextflow.lineage.model.Checksum -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.DataPath import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowOutput import nextflow.lineage.model.WorkflowRun import nextflow.lineage.serde.LinEncoder import nextflow.lineage.config.LineageConfig @@ -289,42 +289,42 @@ class LinObserverTest extends Specification { and: 'Expected LID objects' def sourceHash = CacheHelper.hasher('echo task source').hash().toString() - def scriptHash = CacheHelper.hasher('this is the script').hash().toString() + def script = 'this is the script' def taskDescription = new nextflow.lineage.model.TaskRun(uniqueId.toString(), "foo", new Checksum(sourceHash, "nextflow", "standard"), - new Checksum(scriptHash, "nextflow", "standard"), + script, [ new Parameter("path", "file1", ['lid://78567890/file1.txt']), new Parameter("path", "file2", [[path: normalizer.normalizePath(file), checksum: [value:fileHash, algorithm: "nextflow", mode: "standard"]]]), new Parameter("val", "id", "value") ], null, null, null, null, [:], [], "lid://hash", null) - def dataOutput1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), + def dataOutput1 = new FileOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), "lid://1234567890", "lid://hash", "lid://1234567890", attrs1.size(), LinUtils.toDate(attrs1.creationTime()), LinUtils.toDate(attrs1.lastModifiedTime()) ) - def dataOutput2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), + def dataOutput2 = new FileOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), "lid://1234567890", "lid://hash", "lid://1234567890", attrs2.size(), LinUtils.toDate(attrs2.creationTime()), LinUtils.toDate(attrs2.lastModifiedTime()) ) when: observer.onProcessComplete(handler, null ) def taskRunResult = store.load("${hash.toString()}") - def dataOutputResult1 = store.load("${hash}/fileOut1.txt") as DataOutput - def dataOutputResult2 = store.load("${hash}/fileOut2.txt") as DataOutput - def taskOutputsResult = store.load("${hash}#outputs") as TaskOutputs + def dataOutputResult1 = store.load("${hash}/fileOut1.txt") as FileOutput + def dataOutputResult2 = store.load("${hash}/fileOut2.txt") as FileOutput + def taskOutputsResult = store.load("${hash}#output") as TaskOutput then: taskRunResult == taskDescription dataOutputResult1 == dataOutput1 dataOutputResult2 == dataOutput2 taskOutputsResult.taskRun == "lid://1234567890" taskOutputsResult.workflowRun == "lid://hash" - taskOutputsResult.outputs.size() == 3 - taskOutputsResult.outputs.get(0).type == "path" - taskOutputsResult.outputs.get(0).name == "file1" - taskOutputsResult.outputs.get(0).value == "lid://1234567890/fileOut1.txt" - taskOutputsResult.outputs.get(1).type == "path" - taskOutputsResult.outputs.get(1).name == "file2" - taskOutputsResult.outputs.get(1).value == ["lid://1234567890/fileOut2.txt"] - taskOutputsResult.outputs.get(2).type == "val" - taskOutputsResult.outputs.get(2).name == "id" - taskOutputsResult.outputs.get(2).value == "value" + taskOutputsResult.output.size() == 3 + taskOutputsResult.output.get(0).type == "path" + taskOutputsResult.output.get(0).name == "file1" + taskOutputsResult.output.get(0).value == "lid://1234567890/fileOut1.txt" + taskOutputsResult.output.get(1).type == "path" + taskOutputsResult.output.get(1).name == "file2" + taskOutputsResult.output.get(1).value == ["lid://1234567890/fileOut2.txt"] + taskOutputsResult.output.get(2).type == "val" + taskOutputsResult.output.get(2).name == "id" + taskOutputsResult.output.get(2).value == "value" cleanup: folder?.deleteDir() @@ -360,7 +360,7 @@ class LinObserverTest extends Specification { } and: def attrs = Files.readAttributes(outFile, BasicFileAttributes) - def output = new DataOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), + def output = new FileOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), "lid://15cd5b07", "lid://hash", "lid://15cd5b07", attrs.size(), LinUtils.toDate(attrs.creationTime()), LinUtils.toDate(attrs.lastModifiedTime()) ) and: observer.readAttributes(outFile) >> attrs @@ -528,7 +528,7 @@ class LinObserverTest extends Specification { then: 'check file 1 output metadata in lid store' def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() - def output1 = new DataOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), + def output1 = new FileOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), "lid://123987/file.bam", "$LID_PROT${observer.executionHash}", null, attrs1.size(), LinUtils.toDate(attrs1.creationTime()), LinUtils.toDate(attrs1.lastModifiedTime()) ) folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == encoder.encode(output1) @@ -542,7 +542,7 @@ class LinObserverTest extends Specification { observer.onFilePublish(outFile2) observer.onWorkflowPublish("b", outFile2) then: 'Check outFile2 metadata in lid store' - def output2 = new DataOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), + def output2 = new FileOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), "lid://${observer.executionHash}" , "lid://${observer.executionHash}", null, attrs2.size(), LinUtils.toDate(attrs2.creationTime()), LinUtils.toDate(attrs2.lastModifiedTime()) ) folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == encoder.encode(output2) @@ -551,8 +551,8 @@ class LinObserverTest extends Specification { observer.onFlowComplete() then: 'Check history file is updated and Workflow Result is written in the lid store' def finalLid = store.getHistoryLog().getRecord(uniqueId).runLid.substring(LID_PROT.size()) - def resultsRetrieved = store.load("${finalLid}#outputs") as WorkflowOutputs - resultsRetrieved.outputs == [new Parameter(Path.simpleName, "a", "lid://${observer.executionHash}/foo/file.bam"), new Parameter(Path.simpleName, "b", "lid://${observer.executionHash}/foo/file2.bam")] + def resultsRetrieved = store.load("${finalLid}#output") as WorkflowOutput + resultsRetrieved.output == [new Parameter(Path.simpleName, "a", "lid://${observer.executionHash}/foo/file.bam"), new Parameter(Path.simpleName, "b", "lid://${observer.executionHash}/foo/file2.bam")] cleanup: folder?.deleteDir() diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy index 98038aadf8..70987aabcb 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy @@ -32,7 +32,7 @@ class LinPropertyValidationTest extends Specification{ def 'should not throw exception when property exist'(){ when: - new LinPropertyValidator().validate(['value', 'outputs']) + new LinPropertyValidator().validate(['value', 'output']) then: noExceptionThrown() } diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy index c471febb67..47d1247c4a 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy @@ -22,7 +22,7 @@ import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath import nextflow.lineage.model.Parameter import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowOutput import nextflow.lineage.model.WorkflowRun import nextflow.lineage.config.LineageConfig import spock.lang.Specification @@ -74,11 +74,11 @@ class LinUtilsTest extends Specification{ def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) - def outputs1 = new WorkflowOutputs(OffsetDateTime.now(), "lid://testKey", [new Parameter( "String", "output", "name")] ) + def outputs1 = new WorkflowOutput(OffsetDateTime.now(), "lid://testKey", [new Parameter( "String", "output", "name")] ) def lidStore = new DefaultLinStore() lidStore.open(config) lidStore.save(key, value1) - lidStore.save("$key#outputs", outputs1) + lidStore.save("$key#output", outputs1) when: List params = LinUtils.query(lidStore, new URI('lid://testKey#params')) @@ -88,7 +88,7 @@ class LinUtilsTest extends Specification{ (params[0] as List).size() == 2 when: - List outputs = LinUtils.query(lidStore, new URI('lid://testKey#outputs')) + List outputs = LinUtils.query(lidStore, new URI('lid://testKey#output')) then: outputs.size() == 1 outputs[0] instanceof List @@ -137,7 +137,7 @@ class LinUtilsTest extends Specification{ def "should check params in an object"() { given: - def obj = [ "type": "value", "workflow": ["repository": "subvalue"], "outputs" : [ ["path":"/to/file"],["path":"file2"] ] ] + def obj = [ "type": "value", "workflow": ["repository": "subvalue"], "output" : [ ["path":"/to/file"],["path":"file2"] ] ] expect: LinUtils.checkParams(obj, PARAMS) == EXPECTED @@ -148,9 +148,9 @@ class LinUtilsTest extends Specification{ ["type": "wrong"] | false ["workflow.repository": "subvalue"] | true ["workflow.repository": "wrong"] | false - ["outputs.path": "wrong"] | false - ["outputs.path": "/to/file"] | true - ["outputs.path": "file2"] | true + ["output.path": "wrong"] | false + ["output.path": "/to/file"] | true + ["output.path": "file2"] | true } diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index 39958481bd..d9147a8662 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -23,7 +23,7 @@ import nextflow.lineage.LinHistoryRecord import nextflow.lineage.LinStoreFactory import nextflow.lineage.DefaultLinHistoryLog import nextflow.lineage.model.Checksum -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.DataPath import nextflow.lineage.model.Parameter import nextflow.lineage.model.TaskRun @@ -119,7 +119,7 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile.parent) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) def encoder = new LinEncoder().withPrettyPrint(true) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam","lid://123987/", null, 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer @@ -172,27 +172,27 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile5.parent) def encoder = new LinEncoder() def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://45678", null, 1234, time, time, null) lidFile.text = encoder.encode(entry) - entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987", "lid://45678", "lid://123987", 1234, time, time, null) lidFile2.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), - new Checksum("abfsc2375","nextflow","standard"), + 'this is a script', [new Parameter( "val", "sample_id","ggal_gut"), new Parameter("path","reads", ["lid://45678/output.txt"] ), new Parameter("path","input", [new DataPath("path/to/file",new Checksum("45372qe","nextflow","standard"))]) ], null, null, null, null, [:],[], null) lidFile3.text = encoder.encode(entry) - entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://45678", "lid://45678", null, 1234, time, time, null) lidFile4.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), - new Checksum("abfsc2375","nextflow","standard"), + 'this is a script', null,null, null, null, null, [:],[], null) lidFile5.text = encoder.encode(entry) final network = """flowchart BT @@ -241,7 +241,7 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile3.parent) def encoder = new LinEncoder() def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://12345", "lid://12345", null, 1234, time, time, null) lidFile.text = encoder.encode(entry) def wf = new Workflow([new DataPath("/path/to/main.nf)")], "hello-nf", "aasdklk") @@ -284,13 +284,13 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile.parent) def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) def jsonSer = encoder.encode(entry) def expectedOutput = jsonSer lidFile.text = jsonSer when: - new LinCommandImpl().describe(configMap, ["lid:///?type=DataOutput"]) + new LinCommandImpl().describe(configMap, ["lid:///?type=FileOutput"]) def stdout = capture .toString() .readLines()// remove the log part @@ -311,16 +311,16 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile2.parent) def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) - def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) def expectedOutput1 = '[\n "path/to/file",\n "path/to/file2"\n]' def expectedOutput2 = '[\n "path/to/file2",\n "path/to/file"\n]' lidFile.text = encoder.encode(entry) lidFile2.text = encoder.encode(entry2) when: - new LinCommandImpl().describe(configMap, ["lid:///?type=DataOutput#path"]) + new LinCommandImpl().describe(configMap, ["lid:///?type=FileOutput#path"]) def stdout = capture .toString() .readLines()// remove the log part @@ -340,9 +340,9 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile2.parent) def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) - def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) lidFile.text = encoder.encode(entry) lidFile2.text = encoder.encode(entry2) @@ -351,7 +351,7 @@ class LinCommandImplTest extends Specification{ +++ 67890 @@ -1,15 +1,15 @@ { - "type": "DataOutput", + "type": "FileOutput", - "path": "path/to/file", + "path": "path/to/file2", "checksum": { @@ -390,7 +390,7 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile.parent) def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) lidFile.text = encoder.encode(entry) @@ -414,7 +414,7 @@ class LinCommandImplTest extends Specification{ when: def config = new ConfigMap() new LinCommandImpl().log(config) - new LinCommandImpl().describe(config, ["lid:///?type=DataOutput"]) + new LinCommandImpl().describe(config, ["lid:///?type=FileOutput"]) new LinCommandImpl().render(config, ["lid://12345", "output.html"]) new LinCommandImpl().diff(config, ["lid://89012", "lid://12345"]) @@ -441,16 +441,16 @@ class LinCommandImplTest extends Specification{ Files.createDirectories(lidFile2.parent) def encoder = new LinEncoder().withPrettyPrint(true) def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) - def entry = new DataOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) - def entry2 = new DataOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) def expectedOutput1 = '[\n "lid://123987/file.bam",\n "lid://123987/file2.bam"\n]' def expectedOutput2 = '[\n "lid://123987/file2.bam",\n "lid://123987/file.bam"\n]' lidFile.text = encoder.encode(entry) lidFile2.text = encoder.encode(entry2) when: - new LinCommandImpl().find(configMap, ["type=DataOutput"]) + new LinCommandImpl().find(configMap, ["type=FileOutput"]) def stdout = capture .toString() .readLines()// remove the log part diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy index 97b67dccc2..108739c2eb 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy @@ -129,7 +129,7 @@ class LinFileSystemProviderTest extends Specification { def output = data.resolve("output.txt") output.text = "Hello, World!" outputMeta.mkdirs() - outputMeta.resolve(".data.json").text = '{"type":"DataOutput","path":"'+output.toString()+'"}' + outputMeta.resolve(".data.json").text = '{"type":"FileOutput","path":"'+output.toString()+'"}' Global.session = Mock(Session) { getConfig()>>config } and: @@ -244,7 +244,7 @@ class LinFileSystemProviderTest extends Specification { def output = data.resolve("output.txt") output.text = "Hello, World!" outputMeta.mkdirs() - outputMeta.resolve(".data.json").text = '{"type":"DataOutput","path":"'+output.toString()+'"}' + outputMeta.resolve(".data.json").text = '{"type":"FileOutput","path":"'+output.toString()+'"}' Global.session = Mock(Session) { getConfig()>>config } and: @@ -285,7 +285,7 @@ class LinFileSystemProviderTest extends Specification { meta.resolve('12345/output1').mkdirs() meta.resolve('12345/output2').mkdirs() meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' - meta.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + output1.toString() + '"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"FileOutput", "path": "' + output1.toString() + '"}' and: def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] @@ -400,7 +400,7 @@ class LinFileSystemProviderTest extends Specification { output.resolve('abc').text = 'file1' output.resolve('.foo').text = 'file2' meta.resolve('12345/output').mkdirs() - meta.resolve('12345/output/.data.json').text = '{"type":"DataOutput", "path": "' + output.toString() + '"}' + meta.resolve('12345/output/.data.json').text = '{"type":"FileOutput", "path": "' + output.toString() + '"}' and: def provider = new LinFileSystemProvider() def lid1 = provider.getPath(LinPath.asUri('lid://12345/output/abc')) @@ -420,7 +420,7 @@ class LinFileSystemProviderTest extends Specification { def file = data.resolve('abc') file.text = 'Hello' meta.resolve('12345/abc').mkdirs() - meta.resolve('12345/abc/.data.json').text = '{"type":"DataOutput", "path": "' + file.toString() + '"}' + meta.resolve('12345/abc/.data.json').text = '{"type":"FileOutput", "path": "' + file.toString() + '"}' Global.session = Mock(Session) { getConfig()>>config } and: def provider = new LinFileSystemProvider() diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy index f3a33b51c9..92b48e16cc 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy @@ -20,8 +20,8 @@ import nextflow.lineage.LinUtils import nextflow.lineage.model.Checksum import nextflow.lineage.model.Parameter import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowOutputs -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.WorkflowRun import nextflow.lineage.serde.LinEncoder import nextflow.util.CacheHelper @@ -145,10 +145,10 @@ class LinPathTest extends Specification { meta.resolve('12345/output1').mkdirs() meta.resolve('12345/path/to/file2.txt').mkdirs() meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' - meta.resolve('12345/output1/.data.json').text = '{"type":"DataOutput", "path": "' + outputFolder.toString() + '"}' - meta.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"DataOutput", "path": "' + outputFile.toString() + '"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"FileOutput", "path": "' + outputFolder.toString() + '"}' + meta.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"FileOutput", "path": "' + outputFile.toString() + '"}' def time = OffsetDateTime.now() - def wfResultsMetadata = new LinEncoder().withPrettyPrint(true).encode(new WorkflowOutputs(time, "lid://1234", [new Parameter( "Path", "a", "lid://1234/a.txt")])) + def wfResultsMetadata = new LinEncoder().withPrettyPrint(true).encode(new WorkflowOutput(time, "lid://1234", [new Parameter( "Path", "a", "lid://1234/a.txt")])) meta.resolve('5678/').mkdirs() meta.resolve('5678/.data.json').text = wfResultsMetadata @@ -195,7 +195,7 @@ class LinPathTest extends Specification { result.text == wfResultsMetadata when: 'Lid description subobject' - def result2 = new LinPath(lidFs, '5678#outputs').getTargetOrMetadataPath() + def result2 = new LinPath(lidFs, '5678#output').getTargetOrMetadataPath() then: result2 instanceof LinMetadataPath result2.text == LinUtils.encodeSearchOutputs([new Parameter("Path","a", "lid://1234/a.txt")], true) @@ -218,9 +218,9 @@ class LinPathTest extends Specification { p.text == '"repo"' when: 'outputs' - def outputs = new WorkflowOutputs(OffsetDateTime.now(), "lid://123456", [ new Parameter("Collection", "samples", ["sample1", "sample2"])]) - lidFs.store.save("123456/outputs", outputs) - Path p2 = LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["outputs"] as String[]) + def outputs = new WorkflowOutput(OffsetDateTime.now(), "lid://123456", [new Parameter("Collection", "samples", ["sample1", "sample2"])]) + lidFs.store.save("123456/output", outputs) + Path p2 = LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["output"] as String[]) then: p2 instanceof LinMetadataPath p2.text == LinUtils.encodeSearchOutputs([new Parameter("Collection", "samples", ["sample1", "sample2"])], true) @@ -232,10 +232,10 @@ class LinPathTest extends Specification { exception.message == "Target path '123456#no-exist' does not exist" when: 'outputs does not exists' - LinPath.getMetadataAsTargetPath(wf, lidFs, "6789", ["outputs"] as String[]) + LinPath.getMetadataAsTargetPath(wf, lidFs, "6789", ["output"] as String[]) then: def exception1 = thrown(FileNotFoundException) - exception1.message == "Target path '6789#outputs' does not exist" + exception1.message == "Target path '6789#output' does not exist" when: 'null object' LinPath.getMetadataAsTargetPath(null, lidFs, "123456", ["no-exist"] as String[]) @@ -602,7 +602,7 @@ class LinPathTest extends Specification { def file = wdir.resolve("file.txt") file.text = "this is a data file" def hash = CacheHelper.hasher(file).hash().toString() - def correctData = new DataOutput(file.toString(), new Checksum(hash,"nextflow", "standard")) + def correctData = new FileOutput(file.toString(), new Checksum(hash,"nextflow", "standard")) LinPath.validateDataOutput(correctData) def stdout = capture .toString() @@ -623,7 +623,7 @@ class LinPathTest extends Specification { def file = wdir.resolve("file.txt") file.text = "this is a data file" def hash = CacheHelper.hasher(file).hash().toString() - def correctData = new DataOutput(file.toString(), new Checksum("abscd","nextflow", "standard")) + def correctData = new FileOutput(file.toString(), new Checksum("abscd","nextflow", "standard")) LinPath.validateDataOutput(correctData) def stdout = capture .toString() @@ -645,7 +645,7 @@ class LinPathTest extends Specification { def file = wdir.resolve("file.txt") file.text = "this is a data file" def hash = CacheHelper.hasher(file).hash().toString() - def correctData = new DataOutput(file.toString(), new Checksum(hash,"not-supported", "standard")) + def correctData = new FileOutput(file.toString(), new Checksum(hash,"not-supported", "standard")) LinPath.validateDataOutput(correctData) def stdout = capture .toString() @@ -664,7 +664,7 @@ class LinPathTest extends Specification { def 'should throw exception when file not found validating hash'(){ when: - def correctData = new DataOutput("not/existing/file", new Checksum("120741","nextflow", "standard")) + def correctData = new FileOutput("not/existing/file", new Checksum("120741","nextflow", "standard")) LinPath.validateDataOutput(correctData) then: diff --git a/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy index 89bd06a7a4..e747151b52 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy @@ -19,11 +19,11 @@ package nextflow.lineage.serde import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath import nextflow.lineage.model.Parameter -import nextflow.lineage.model.DataOutput -import nextflow.lineage.model.TaskOutputs +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.TaskOutput import nextflow.lineage.model.TaskRun import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowOutputs +import nextflow.lineage.model.WorkflowOutput import nextflow.lineage.model.WorkflowRun import spock.lang.Specification @@ -35,7 +35,7 @@ class LinEncoderTest extends Specification{ given: def encoder = new LinEncoder() and: - def output = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), + def output = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) when: @@ -43,8 +43,8 @@ class LinEncoderTest extends Specification{ def object = encoder.decode(encoded) then: - object instanceof DataOutput - def result = object as DataOutput + object instanceof FileOutput + def result = object as FileOutput result.path == "/path/to/file" result.checksum instanceof Checksum result.checksum.value == "hash_value" @@ -88,17 +88,17 @@ class LinEncoderTest extends Specification{ def encoder = new LinEncoder() and: def time = OffsetDateTime.now() - def wfResults = new WorkflowOutputs(time, "lid://1234", [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")]) + def wfResults = new WorkflowOutput(time, "lid://1234", [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")]) when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) then: - object instanceof WorkflowOutputs - def result = object as WorkflowOutputs + object instanceof WorkflowOutput + def result = object as WorkflowOutput result.createdAt == time result.workflowRun == "lid://1234" - result.outputs == [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")] + result.output == [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")] } def 'should encode and decode TaskRun'() { @@ -107,7 +107,7 @@ class LinEncoderTest extends Specification{ and: def uniqueId = UUID.randomUUID() def taskRun = new TaskRun( - uniqueId.toString(),"name", new Checksum("78910", "nextflow", "standard"), new Checksum("74517", "nextflow", "standard"), + uniqueId.toString(),"name", new Checksum("78910", "nextflow", "standard"), 'this is a script', [new Parameter("String", "param1", "value1")], "container:version", "conda", "spack", "amd64", [a: "A", b: "B"], [new DataPath("path/to/file", new Checksum("78910", "nextflow", "standard"))] ) @@ -120,9 +120,9 @@ class LinEncoderTest extends Specification{ result.sessionId == uniqueId.toString() result.name == "name" result.codeChecksum.value == "78910" - result.scriptChecksum.value == "74517" - result.inputs.size() == 1 - result.inputs.get(0).name == "param1" + result.script == "this is a script" + result.input.size() == 1 + result.input.get(0).name == "param1" result.container == "container:version" result.conda == "conda" result.spack == "spack" @@ -139,32 +139,32 @@ class LinEncoderTest extends Specification{ and: def time = OffsetDateTime.now() def parameter = new Parameter("a","b", "c") - def wfResults = new TaskOutputs("lid://1234", "lid://5678", time, [parameter], null) + def wfResults = new TaskOutput("lid://1234", "lid://5678", time, [parameter], null) when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) then: - object instanceof TaskOutputs - def result = object as TaskOutputs + object instanceof TaskOutput + def result = object as TaskOutput result.createdAt == time result.taskRun == "lid://1234" result.workflowRun == "lid://5678" - result.outputs.size() == 1 - result.outputs[0] == parameter + result.output.size() == 1 + result.output[0] == parameter } def 'object with null date attributes' () { given: def encoder = new LinEncoder() and: - def wfResults = new WorkflowOutputs(null, "lid://1234") + def wfResults = new WorkflowOutput(null, "lid://1234") when: def encoded = encoder.encode(wfResults) def object = encoder.decode(encoded) then: - encoded == '{"type":"WorkflowOutputs","createdAt":null,"workflowRun":"lid://1234","outputs":null,"annotations":null}' - def result = object as WorkflowOutputs + encoded == '{"type":"WorkflowOutput","createdAt":null,"workflowRun":"lid://1234","output":null,"annotations":null}' + def result = object as WorkflowOutput result.createdAt == null } diff --git a/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy index 903e92837b..9ccc147bb0 100644 --- a/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy +++ b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy @@ -20,7 +20,7 @@ package nextflow.lineage.h2 import nextflow.lineage.model.Annotation import nextflow.lineage.model.Checksum import nextflow.lineage.model.DataPath -import nextflow.lineage.model.DataOutput +import nextflow.lineage.model.FileOutput import nextflow.lineage.model.Parameter import nextflow.lineage.model.Workflow import nextflow.lineage.model.WorkflowRun @@ -51,7 +51,7 @@ class H2LinStoreTest extends Specification { def 'should store and get a value' () { given: - def value = new DataOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid//task", 1234) + def value = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid//task", 1234) when: store.save('/some/key', value) then: @@ -67,18 +67,18 @@ class H2LinStoreTest extends Specification { def key = "testKey" def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) def key2 = "testKey2" - def value2 = new DataOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key1", "value1"), new Annotation("key2", "value2")]) + def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key1", "value1"), new Annotation("key2", "value2")]) def key3 = "testKey3" - def value3 = new DataOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key2", "value2"), new Annotation("key3", "value3")]) + def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key2", "value2"), new Annotation("key3", "value3")]) def key4 = "testKey4" - def value4 = new DataOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key3", "value3"), new Annotation("key4", "value4")]) + def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key3", "value3"), new Annotation("key4", "value4")]) store.save(key, value1) store.save(key2, value2) store.save(key3, value3) store.save(key4, value4) when: - def results = store.search("type=DataOutput&annotations.key=key2&annotations.value=value2") + def results = store.search("type=FileOutput&annotations.key=key2&annotations.value=value2") then: results.size() == 2 } From 034665c8591a5ecf0d1a2756bb7824ec0629749b Mon Sep 17 00:00:00 2001 From: jorgee Date: Tue, 22 Apr 2025 15:54:32 +0200 Subject: [PATCH 67/72] fix tests Signed-off-by: jorgee --- .../test/groovy/nextflow/cli/CmdLineageTest.groovy | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index a7172abf9f..9efdc0c3dd 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -77,7 +77,7 @@ class CmdLineageTest extends Specification { lidLog.write("run_name", uniqueId, "lid://123456", date) def recordEntry = "${LinHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tlid://123456".toString() when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["log"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["list"]) lidCmd.run() def stdout = capture .toString() @@ -105,7 +105,7 @@ class CmdLineageTest extends Specification { getOptions() >> new CliOptions(config: [configFile.toString()]) } when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["log"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["list"]) lidCmd.run() def stdout = capture .toString() @@ -214,7 +214,7 @@ class CmdLineageTest extends Specification { lidFile2.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "foo", new Checksum("abcde2345","nextflow","standard"), - new Checksum("abfsc2375","nextflow","standard"), + 'this is a script', [new Parameter( "val", "sample_id","ggal_gut"), new Parameter("path","reads",["lid://45678/output.txt"])], null, null, null, null, [:],[], null) @@ -224,7 +224,7 @@ class CmdLineageTest extends Specification { lidFile4.text = encoder.encode(entry) entry = new TaskRun("u345-2346-1stw2", "bar", new Checksum("abfs2556","nextflow","standard"), - new Checksum("abfsc2375","nextflow","standard"), + 'this is a script', null,null, null, null, null, [:],[], null) lidFile5.text = encoder.encode(entry) final network = """flowchart BT @@ -283,7 +283,7 @@ class CmdLineageTest extends Specification { def expectedOutput = jsonSer lidFile.text = jsonSer when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=DataOutput"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=FileOutput"]) lidCmd.run() def stdout = capture .toString() @@ -318,7 +318,7 @@ class CmdLineageTest extends Specification { def expectedOutput = jsonSer lidFile.text = jsonSer when: - def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=DataOutput"]) + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=FileOutput"]) lidCmd.run() def stdout = capture .toString() From 119b1d91b2a4f64cb80ecce9730c387b25aeb696 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 22 Apr 2025 21:23:56 +0200 Subject: [PATCH 68/72] Minor changes [ci fast] Signed-off-by: Paolo Di Tommaso --- .../lineage/cli/LinCommandImpl.groovy | 3 +-- .../nextflow/lineage/fs/LinFileSystem.groovy | 6 +++++- .../lineage/fs/LinFileSystemProvider.groovy | 21 +++++++++---------- .../main/nextflow/lineage/fs/LinPath.groovy | 4 ++-- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy index 46202a9d51..43290e46ff 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -41,7 +41,6 @@ import org.eclipse.jgit.diff.DiffAlgorithm import org.eclipse.jgit.diff.DiffFormatter import org.eclipse.jgit.diff.RawText import org.eclipse.jgit.diff.RawTextComparator - /** * Implements lineage command line operations * @@ -50,7 +49,7 @@ import org.eclipse.jgit.diff.RawTextComparator @CompileStatic class LinCommandImpl implements CmdLineage.LinCommand { - private static Path DEFAULT_HTML_FILE = Path.of("lineage.html") + private static final Path DEFAULT_HTML_FILE = Path.of("lineage.html") @Canonical static class Edge { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy index 5802e2351b..aa8fdaeb69 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy @@ -16,6 +16,10 @@ package nextflow.lineage.fs +import com.google.common.collect.ImmutableSet +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory + import java.nio.file.FileStore import java.nio.file.FileSystem import java.nio.file.Path @@ -104,7 +108,7 @@ class LinFileSystem extends FileSystem { @Override Set supportedFileAttributeViews() { - return null + return ImmutableSet.of("basic") } @Override diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy index 555a227d73..94b7f02064 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy @@ -38,7 +38,6 @@ import java.nio.file.spi.FileSystemProvider import groovy.transform.CompileStatic import nextflow.lineage.config.LineageConfig - /** * File System Provider for LID Paths * @@ -73,17 +72,17 @@ class LinFileSystemProvider extends FileSystemProvider { @Override synchronized FileSystem newFileSystem(URI uri, Map config) throws IOException { checkScheme(uri) - if (!fileSystem) { - //Overwrite default values with provided configuration - final defaultConfig = LineageConfig.asMap() - if (config) { - for (Map.Entry e : config.entrySet()) { - defaultConfig.put(e.key, e.value) - } + if (fileSystem) { + return fileSystem + } + //Overwrite default values with provided configuration + final defaultConfig = LineageConfig.asMap() + if (config) { + for (Map.Entry e : config.entrySet()) { + defaultConfig.put(e.key, e.value) } - fileSystem = new LinFileSystem(this, new LineageConfig(defaultConfig)) } - return fileSystem + return fileSystem = new LinFileSystem(this, new LineageConfig(defaultConfig)) } @Override @@ -353,7 +352,7 @@ class LinFileSystemProvider extends FileSystemProvider { final lid = toLinPath(path) if (lid instanceof LinMetadataPath) return (lid as LinMetadataPath).readAttributes(type) - readAttributes0(lid, type, options) + return readAttributes0(lid, type, options) } private A readAttributes0(LinPath lid, Class type, LinkOption... options) throws IOException { diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy index 0294f1501e..8d0559f55a 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -151,9 +151,9 @@ class LinPath implements Path, LogicalDataPath { */ protected static Path findTarget(LinFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception { if( !fs ) - throw new IllegalArgumentException("Cannot get target path for a relative LinPath") + throw new IllegalArgumentException("Cannot get target path for a relative lineage path") if( filePath.isEmpty() || filePath == SEPARATOR ) - throw new IllegalArgumentException("Cannot get target path for an empty LinPath") + throw new IllegalArgumentException("Cannot get target path for an empty lineage path") final store = fs.getStore() if( !store ) throw new Exception("Lineage store not found - Check Nextflow configuration") From d18674238eec026a44bd39c4b158c1ddd4d0cc24 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 22 Apr 2025 21:29:04 +0200 Subject: [PATCH 69/72] [ci fast] merge master Signed-off-by: Paolo Di Tommaso From 303807ba18898fdc66b273d107c7e5ee6b744536 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 22 Apr 2025 22:40:12 +0200 Subject: [PATCH 70/72] Remove h2 plugin [ci fast] Signed-off-by: Paolo Di Tommaso --- plugins/nf-lineage-h2/build.gradle | 49 ----- .../lineage/h2/H2LinHistoryLog.groovy | 112 ----------- .../nextflow/lineage/h2/H2LinPlugin.groovy | 35 ---- .../nextflow/lineage/h2/H2LinStore.groovy | 178 ------------------ .../lineage/h2/H2LinStoreFactory.groovy | 41 ---- .../src/resources/META-INF/MANIFEST.MF | 6 - .../src/resources/META-INF/extensions.idx | 18 -- .../lineage/h2/H2LinHistoryLogTest.groovy | 117 ------------ .../nextflow/lineage/h2/H2LinStoreTest.groovy | 85 --------- settings.gradle | 1 - 10 files changed, 642 deletions(-) delete mode 100644 plugins/nf-lineage-h2/build.gradle delete mode 100644 plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy delete mode 100644 plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy delete mode 100644 plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy delete mode 100644 plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy delete mode 100644 plugins/nf-lineage-h2/src/resources/META-INF/MANIFEST.MF delete mode 100644 plugins/nf-lineage-h2/src/resources/META-INF/extensions.idx delete mode 100644 plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy delete mode 100644 plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy diff --git a/plugins/nf-lineage-h2/build.gradle b/plugins/nf-lineage-h2/build.gradle deleted file mode 100644 index 8d38fe1425..0000000000 --- a/plugins/nf-lineage-h2/build.gradle +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -apply plugin: 'java' -apply plugin: 'java-test-fixtures' -apply plugin: 'idea' -apply plugin: 'groovy' - -sourceSets { - main.java.srcDirs = [] - main.groovy.srcDirs = ['src/main'] - main.resources.srcDirs = ['src/resources'] - test.groovy.srcDirs = ['src/test'] - test.java.srcDirs = [] - test.resources.srcDirs = [] -} - -configurations { - // see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies - runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api' -} - -dependencies { - compileOnly project(':nextflow') - compileOnly project(':nf-lineage') - compileOnly 'org.slf4j:slf4j-api:2.0.16' - compileOnly 'org.pf4j:pf4j:3.12.0' - - api("com.h2database:h2:2.2.224") - api("com.zaxxer:HikariCP:5.0.1") - api("org.apache.groovy:groovy-sql:4.0.26") { transitive=false } - - testImplementation(project(':nf-lineage')) - testImplementation(testFixtures(project(":nextflow"))) -} diff --git a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy deleted file mode 100644 index 3cb0c91c34..0000000000 --- a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinHistoryLog.groovy +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package nextflow.lineage.h2 - -import java.sql.Timestamp - -import com.zaxxer.hikari.HikariDataSource -import groovy.sql.GroovyRowResult -import groovy.sql.Sql -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j -import nextflow.lineage.LinHistoryLog -import nextflow.lineage.LinHistoryRecord - -/** - * Implement a {@link LinHistoryLog} based on H2 database - * - * @author Paolo Di Tommaso - */ -@Slf4j -@CompileStatic -class H2LinHistoryLog implements LinHistoryLog { - - private HikariDataSource dataSource - - H2LinHistoryLog(HikariDataSource dataSource) { - this.dataSource = dataSource - } - - @Override - void write(String name, UUID sessionId, String runCid) { - try(final sql=new Sql(dataSource)) { - def query = """ - INSERT INTO lid_history_record (timestamp, run_name, session_id, run_lid) - VALUES (?, ?, ?, ?) - """ - def timestamp = new Timestamp(System.currentTimeMillis()) // Current timestamp - sql.executeInsert(query, List.of(timestamp, name, sessionId.toString(), runCid)) - } - } - - @Override - void updateRunLid(UUID sessionId, String runLid) { - try(final sql=new Sql(dataSource)) { - def query = """ - UPDATE lid_history_record - SET run_lid = ? - WHERE session_id = ? - """ - - final count = sql.executeUpdate(query, List.of(runLid, sessionId.toString())) - if (count > 0) { - log.debug "Successfully updated run_lid for session_id: $sessionId" - } - else { - log.warn "No record found with session_id: $sessionId" - } - } - } - - @Override - List getRecords() { - try(final sql=new Sql(dataSource)) { - final result = new ArrayList(100) - final query = "SELECT * FROM lid_history_record " - final rows = sql.rows(query) - for( GroovyRowResult row : rows ) { - result.add( - new LinHistoryRecord( - row.timestamp as Date, - row.run_name as String, - UUID.fromString(row.session_id as String), - row.run_lid as String, - ) - ) - } - return result - } - } - - @Override - LinHistoryRecord getRecord(UUID sessionId) { - try(final sql=new Sql(dataSource)) { - final query = "SELECT * FROM lid_history_record WHERE session_id = ?" - final row = sql.firstRow(query, sessionId.toString()) // Convert UUID to String for query - if( !row ) - return null - return new LinHistoryRecord( - row.timestamp as Date, - row.run_name as String, - UUID.fromString(row.session_id as String), - row.run_lid as String, - ) - } - } - -} diff --git a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy deleted file mode 100644 index 207f9a2d44..0000000000 --- a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinPlugin.groovy +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package nextflow.lineage.h2 - -import groovy.transform.CompileStatic -import nextflow.plugin.BasePlugin -import org.pf4j.PluginWrapper - -/** - * Implements plugin entry class for H2 db based CID store - * - * @author Paolo Di Tommaso - */ -@CompileStatic -class H2LinPlugin extends BasePlugin{ - - H2LinPlugin(PluginWrapper wrapper) { - super(wrapper) - } -} diff --git a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy deleted file mode 100644 index c109cc3931..0000000000 --- a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStore.groovy +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package nextflow.lineage.h2 - -import java.sql.Clob - -import com.zaxxer.hikari.HikariDataSource -import groovy.json.JsonSlurper -import groovy.sql.Sql -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j -import nextflow.lineage.LinHistoryLog -import nextflow.lineage.LinStore -import nextflow.lineage.LinUtils -import nextflow.lineage.serde.LinEncoder -import nextflow.lineage.serde.LinSerializable -import nextflow.lineage.config.LineageConfig -import nextflow.lineage.config.LineageStoreOpts -import nextflow.util.TestOnly -/** - * - * @author Paolo Di Tommaso - */ -@Slf4j -@CompileStatic -class H2LinStore implements LinStore { - - private HikariDataSource dataSource - private LinEncoder encoder - - @Override - H2LinStore open(LineageConfig config) { - assert config.store.location.startsWith('jdbc:h2:') - log.info "Connecting CID H2 store: '${config.store.location}'" - encoder = new LinEncoder() - dataSource = createDataSource(config.store) - // create the db tables - createDbTables(dataSource) - createAlias(dataSource) - return this - } - - static HikariDataSource createDataSource(LineageStoreOpts store) { - final result = new HikariDataSource() - result.jdbcUrl = store.location - result.driverClassName = 'org.h2.Driver' - result.username = 'sa' - result.password = '' - result.maximumPoolSize = 10 - return result - } - - static void createDbTables(HikariDataSource dataSource) { - // create DDL is missing - try(final sql=new Sql(dataSource)) { - sql.execute(''' - CREATE TABLE IF NOT EXISTS lid_file ( - id BIGINT AUTO_INCREMENT PRIMARY KEY, - path VARCHAR UNIQUE NOT NULL, - metadata CLOB NOT NULL - ); - - CREATE TABLE IF NOT EXISTS lid_file_tag ( - file_id BIGINT NOT NULL, - tags TEXT NOT NULL, - PRIMARY KEY (file_id), - FOREIGN KEY (file_id) REFERENCES lid_file(id) ON DELETE CASCADE - ); - - CREATE TABLE IF NOT EXISTS lid_history_record ( - id IDENTITY PRIMARY KEY, -- Auto-increment primary key - timestamp TIMESTAMP NOT NULL, - run_name VARCHAR(255) NOT NULL, - session_id UUID NOT NULL, - run_lid VARCHAR(255) NOT NULL, - UNIQUE (run_name, session_id) -- Enforce uniqueness constraint - ); - ''') - } - } - - static void createAlias(HikariDataSource dataSource){ - try(final sql=new Sql(dataSource)) { - sql.execute(""" - CREATE ALIAS IF NOT EXISTS JSON_MATCH FOR "nextflow.lineage.h2.H2LinStore.matchesJsonQuery" - """) - } - } - - @Override - void save(String key, LinSerializable object) { - final value = encoder.encode(object) - try(final sql=new Sql(dataSource)) { - sql.execute(""" - INSERT INTO lid_file (path, metadata) VALUES (?, ?) - """, [key, (Object)value]) - } - } - - @Override - LinSerializable load(String key) { - try(final sql=new Sql(dataSource)) { - final result = sql.firstRow("SELECT metadata FROM lid_file WHERE path = ?", List.of(key)) - return result ? encoder.decode(toValue(result.metadata).toString()) : null - } - } - - protected Object toValue(Object obj) { - return obj instanceof Clob - ? obj.characterStream.text - : obj - } - - @Override - LinHistoryLog getHistoryLog() { - return new H2LinHistoryLog(dataSource) - } - - @Override - Map search(String queryString) { - final results= new HashMap() - try(final sql=new Sql(dataSource)) { - sql.eachRow("SELECT path, metadata FROM lid_file WHERE JSON_MATCH(metadata, ?)", List.of(queryString)) { row -> - results.put(row['path'] as String, encoder.decode(toValue(row['metadata']) as String)) - } - } - return results - } - - /** - * JSON_MATCH implementation for h2 - * @param jsonString - * @param queryString - * @return - */ - static boolean matchesJsonQuery(String jsonString, String queryString) { - def json = new JsonSlurper().parseText(jsonString) - def conditions = LinUtils.parseQuery(queryString) - return LinUtils.checkParams(json, conditions) - } - - @Override - void close() { - dataSource.close() - } - - @TestOnly - void truncateAllTables() { - try(final sql=new Sql(dataSource)) { - println "Truncating all tables..." - sql.execute("SET REFERENTIAL_INTEGRITY FALSE") // Disable foreign key constraints - - def tables = sql.rows("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'PUBLIC'") - tables.each { table -> - final stm = "TRUNCATE TABLE ${table.TABLE_NAME}" as String - sql.execute(stm) // Truncate each table - } - - sql.execute("SET REFERENTIAL_INTEGRITY TRUE") // Re-enable constraints - println "All tables truncated successfully" - } - } -} diff --git a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy b/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy deleted file mode 100644 index 466c4579df..0000000000 --- a/plugins/nf-lineage-h2/src/main/nextflow/lineage/h2/H2LinStoreFactory.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package nextflow.lineage.h2 - -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j -import nextflow.lineage.LinStore -import nextflow.lineage.LinStoreFactory -import nextflow.lineage.config.LineageConfig -import nextflow.plugin.Priority - -@Slf4j -@CompileStatic -@Priority(-10) // <-- lower is higher, this is needed to override default provider behavior -class H2LinStoreFactory extends LinStoreFactory { - - @Override - boolean canOpen(LineageConfig config) { - return config.store.location.startsWith('jdbc:h2:') - } - - @Override - protected LinStore newInstance(LineageConfig config) { - return new H2LinStore().open(config) - } -} diff --git a/plugins/nf-lineage-h2/src/resources/META-INF/MANIFEST.MF b/plugins/nf-lineage-h2/src/resources/META-INF/MANIFEST.MF deleted file mode 100644 index 012e959388..0000000000 --- a/plugins/nf-lineage-h2/src/resources/META-INF/MANIFEST.MF +++ /dev/null @@ -1,6 +0,0 @@ -Manifest-Version: 1.0 -Plugin-Class: nextflow.lineage.h2.H2LinPlugin -Plugin-Id: nf-lineage-h2 -Plugin-Version: 0.1.0 -Plugin-Provider: Seqera Labs -Plugin-Requires: >=25.01.0-edge diff --git a/plugins/nf-lineage-h2/src/resources/META-INF/extensions.idx b/plugins/nf-lineage-h2/src/resources/META-INF/extensions.idx deleted file mode 100644 index 16c6f06f0c..0000000000 --- a/plugins/nf-lineage-h2/src/resources/META-INF/extensions.idx +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright 2013-2025, Seqera Labs -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -nextflow.lineage.h2.H2LinPlugin -nextflow.lineage.h2.H2LinStoreFactory diff --git a/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy deleted file mode 100644 index 9a597da176..0000000000 --- a/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinHistoryLogTest.groovy +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package nextflow.lineage.h2 - -import nextflow.lineage.config.LineageConfig -import spock.lang.Shared -import spock.lang.Specification - -/** - * - * @author Paolo Di Tommaso - */ -class H2LinHistoryLogTest extends Specification { - - @Shared - H2LinStore store - - def setupSpec() { - def uri = "jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1" - def config = new LineageConfig([store:[location:uri]]) - store = new H2LinStore().open(config) - } - - def cleanupSpec() { - store.close() - } - - def cleanup() { - store.truncateAllTables() - } - - def 'should write lid record' () { - given: - def log = store.getHistoryLog() - def uuid = UUID.randomUUID() - when: - log.write('foo', uuid, '1234') - then: - noExceptionThrown() - - when: - def rec = log.getRecord(uuid) - then: - rec.runName == 'foo' - rec.sessionId == uuid - rec.runLid == '1234' - } - - def 'should update run lid' () { - given: - def log = store.getHistoryLog() - def uuid = UUID.randomUUID() - when: - log.write('foo', uuid, '1234') - then: - noExceptionThrown() - - when: - log.updateRunLid(uuid, '4444') - then: - noExceptionThrown() - - when: - def rec = log.getRecord(uuid) - then: - rec.runName == 'foo' - rec.sessionId == uuid - rec.runLid == '4444' - } - - def 'should update get records' () { - given: - def log = store.getHistoryLog() - def uuid1 = UUID.randomUUID() - def uuid2 = UUID.randomUUID() - def uuid3 = UUID.randomUUID() - when: - log.write('foo1', uuid1, '1') - log.write('foo2', uuid2, '2') - log.write('foo3', uuid3, '3') - then: - noExceptionThrown() - - when: - def all = log.getRecords() - then: - all.size()==3 - and: - all[0].runName == 'foo1' - all[0].sessionId == uuid1 - all[0].runLid == '1' - and: - all[1].runName == 'foo2' - all[1].sessionId == uuid2 - all[1].runLid == '2' - and: - all[2].runName == 'foo3' - all[2].sessionId == uuid3 - all[2].runLid == '3' - } - -} diff --git a/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy b/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy deleted file mode 100644 index 9ccc147bb0..0000000000 --- a/plugins/nf-lineage-h2/src/test/nextflow/lineage/h2/H2LinStoreTest.groovy +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2013-2025, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package nextflow.lineage.h2 - -import nextflow.lineage.model.Annotation -import nextflow.lineage.model.Checksum -import nextflow.lineage.model.DataPath -import nextflow.lineage.model.FileOutput -import nextflow.lineage.model.Parameter -import nextflow.lineage.model.Workflow -import nextflow.lineage.model.WorkflowRun -import nextflow.lineage.config.LineageConfig -import spock.lang.Shared -import spock.lang.Specification - -import java.time.OffsetDateTime - -/** - * - * @author Paolo Di Tommaso - */ -class H2LinStoreTest extends Specification { - - @Shared - H2LinStore store - - def setupSpec() { - def uri = "jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1" - def config = new LineageConfig([store:[location:uri]]) - store = new H2LinStore().open(config) - } - - def cleanupSpec() { - store.close() - } - - def 'should store and get a value' () { - given: - def value = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid//task", 1234) - when: - store.save('/some/key', value) - then: - store.load('/some/key').toString() == value.toString() - } - - def 'should query' () { - given: - def uniqueId = UUID.randomUUID() - def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) - def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") - def time = OffsetDateTime.now() - def key = "testKey" - def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) - def key2 = "testKey2" - def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key1", "value1"), new Annotation("key2", "value2")]) - def key3 = "testKey3" - def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key2", "value2"), new Annotation("key3", "value3")]) - def key4 = "testKey4" - def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "lid://workflow", "lid//task", 1234, time, time, [new Annotation("key3", "value3"), new Annotation("key4", "value4")]) - - store.save(key, value1) - store.save(key2, value2) - store.save(key3, value3) - store.save(key4, value4) - when: - def results = store.search("type=FileOutput&annotations.key=key2&annotations.value=value2") - then: - results.size() == 2 - } -} diff --git a/settings.gradle b/settings.gradle index 4d5abccb4d..53d56ba13b 100644 --- a/settings.gradle +++ b/settings.gradle @@ -43,4 +43,3 @@ include 'plugins:nf-codecommit' include 'plugins:nf-wave' include 'plugins:nf-cloudcache' include 'plugins:nf-k8s' -include 'plugins:nf-lineage-h2' From 83f564798ede9cdc250b9729ac51783b13debb34 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 23 Apr 2025 09:49:24 +0200 Subject: [PATCH 71/72] Simplified config [ci fast] Signed-off-by: Paolo Di Tommaso --- .../main/groovy/nextflow/cli/CmdRun.groovy | 2 +- .../lineage/config/LineageConfig.groovy | 4 +-- .../nextflow/lineage/LinObserverTest.groovy | 6 ++-- .../lineage/cli/LinCommandImplTest.groovy | 2 +- .../fs/LinFileSystemProviderTest.groovy | 28 +++++++++---------- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 19234d03e1..84488c624a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -354,7 +354,7 @@ class CmdRun extends CmdBase implements HubOptions { runner.session.disableJobsCancellation = getDisableJobsCancellation() final isTowerEnabled = config.navigate('tower.enabled') as Boolean - final isDataEnabled = config.navigate("workflow.lineage.enabled") as Boolean + final isDataEnabled = config.navigate("lineage.enabled") as Boolean if( isTowerEnabled || isDataEnabled || log.isTraceEnabled() ) runner.session.resolvedConfig = ConfigBuilder.resolveConfig(scriptFile.parent, this) // note config files are collected during the build process diff --git a/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy index 73a86acbce..798805e5d8 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy @@ -38,12 +38,12 @@ class LineageConfig { } static Map asMap() { - session?.config?.navigate('workflow.lineage') as Map ?: new HashMap() + session?.config?.navigate('lineage') as Map ?: new HashMap() } static LineageConfig create(Session session) { if( session ) { - return new LineageConfig( session.config.navigate('workflow.lineage') as Map ?: Map.of()) + return new LineageConfig( session.config.navigate('lineage') as Map ?: Map.of()) } else throw new IllegalStateException("Missing Nextflow session") diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy index 04610fff2b..349c36889a 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy @@ -157,7 +157,7 @@ class LinObserverTest extends Specification { def 'should save workflow' (){ given: def folder = Files.createTempDirectory('test') - def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def config = [lineage:[enabled: true, store:[location:folder.toString()]]] def store = new DefaultLinStore(); def uniqueId = UUID.randomUUID() def scriptFile = folder.resolve("main.nf") @@ -333,7 +333,7 @@ class LinObserverTest extends Specification { def 'should save task data output' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def config = [lineage:[enabled: true, store:[location:folder.toString()]]] def store = new DefaultLinStore(); def session = Mock(Session) { getConfig()>>config @@ -481,7 +481,7 @@ class LinObserverTest extends Specification { def 'should save workflow output'() { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def config = [lineage:[enabled: true, store:[location:folder.toString()]]] def store = new DefaultLinStore(); def outputDir = folder.resolve('results') def uniqueId = UUID.randomUUID() diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy index d9147a8662..ce17b89177 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -53,7 +53,7 @@ class LinCommandImplTest extends Specification{ // clear the environment to avoid the local env pollute the test env SysEnv.push([:]) storeLocation = tmpDir.resolve("store") - configMap = new ConfigMap([workflow: [lineage: [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]]]]) + configMap = new ConfigMap([lineage: [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]]]) } def cleanup() { diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy index 108739c2eb..0a47291897 100644 --- a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy @@ -105,7 +105,7 @@ class LinFileSystemProviderTest extends Specification { def 'should get or create a file system' () { given: - def config = [workflow:[lineage:[store:[location: data.toString()]]]] + def config = [lineage:[store:[location: data.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def uri = LinPath.asUri('lid://12345') @@ -124,7 +124,7 @@ class LinFileSystemProviderTest extends Specification { def 'should create new byte channel' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] def outputMeta = meta.resolve("12345/output.txt") def output = data.resolve("output.txt") output.text = "Hello, World!" @@ -182,7 +182,7 @@ class LinFileSystemProviderTest extends Specification { def 'should create new byte channel for LinMetadata' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] def outputMeta = meta.resolve("12345") outputMeta.mkdirs() outputMeta.resolve(".data.json").text = '{"type":"WorkflowRun","sessionId":"session","name":"run_name","params":[{"type":"String","name":"param1","value":"value1"}]}' @@ -239,7 +239,7 @@ class LinFileSystemProviderTest extends Specification { def 'should read lid' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] def outputMeta = meta.resolve("12345/output.txt") def output = data.resolve("output.txt") output.text = "Hello, World!" @@ -262,7 +262,7 @@ class LinFileSystemProviderTest extends Specification { def 'should not create a directory' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def provider = new LinFileSystemProvider() @@ -288,7 +288,7 @@ class LinFileSystemProviderTest extends Specification { meta.resolve('12345/output1/.data.json').text = '{"type":"FileOutput", "path": "' + output1.toString() + '"}' and: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def provider = new LinFileSystemProvider() @@ -325,7 +325,7 @@ class LinFileSystemProviderTest extends Specification { def 'should not delete a file' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def provider = new LinFileSystemProvider() @@ -340,7 +340,7 @@ class LinFileSystemProviderTest extends Specification { def 'should not copy a file' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def provider = new LinFileSystemProvider() @@ -355,7 +355,7 @@ class LinFileSystemProviderTest extends Specification { def 'should not move a file' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def provider = new LinFileSystemProvider() @@ -371,7 +371,7 @@ class LinFileSystemProviderTest extends Specification { def 'should check is same file' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[lineage:[store:[location:folder.toString()]]]] + def config = [lineage:[store:[location:folder.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def provider = new LinFileSystemProvider() @@ -392,7 +392,7 @@ class LinFileSystemProviderTest extends Specification { def 'should check is hidden file' () { given: def folder = Files.createTempDirectory('test') - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } and: def output = folder.resolve('path') @@ -416,7 +416,7 @@ class LinFileSystemProviderTest extends Specification { def 'should read file attributes' () { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] def file = data.resolve('abc') file.text = 'Hello' meta.resolve('12345/abc').mkdirs() @@ -444,7 +444,7 @@ class LinFileSystemProviderTest extends Specification { def 'should throw exception in unsupported methods'() { given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } def provider = new LinFileSystemProvider() @@ -471,7 +471,7 @@ class LinFileSystemProviderTest extends Specification { def 'should throw exception when checking access mode'(){ given: - def config = [workflow:[lineage:[store:[location:wdir.toString()]]]] + def config = [lineage:[store:[location:wdir.toString()]]] Global.session = Mock(Session) { getConfig()>>config } def provider = new LinFileSystemProvider() def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) From 854a9de7761cd2605f614733a096884438d22e1f Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 23 Apr 2025 09:58:42 +0200 Subject: [PATCH 72/72] Fix failing tests [ci fast] Signed-off-by: Paolo Di Tommaso --- .../test/groovy/nextflow/cli/CmdLineageTest.groovy | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy index 9efdc0c3dd..54585e7b0d 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -66,7 +66,7 @@ class CmdLineageTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() def historyFile = folder.resolve(".meta/.history") def lidLog = new DefaultLinHistoryLog(historyFile) def uniqueId = UUID.randomUUID() @@ -98,7 +98,7 @@ class CmdLineageTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() def historyFile = folder.resolve(".meta/.history") Files.createDirectories(historyFile.parent) def launcher = Mock(Launcher){ @@ -127,7 +127,7 @@ class CmdLineageTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() def lidFile = folder.resolve(".meta/12345/.data.json") Files.createDirectories(lidFile.parent) def launcher = Mock(Launcher){ @@ -162,7 +162,7 @@ class CmdLineageTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } @@ -190,7 +190,7 @@ class CmdLineageTest extends Specification { def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') def outputHtml = folder.resolve('lineage.html') - configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() def launcher = Mock(Launcher){ getOptions() >> new CliOptions(config: [configFile.toString()]) } @@ -269,7 +269,7 @@ class CmdLineageTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() def lidFile = folder.resolve(".meta/12345/.data.json") Files.createDirectories(lidFile.parent) def launcher = Mock(Launcher){ @@ -304,7 +304,7 @@ class CmdLineageTest extends Specification { given: def folder = Files.createTempDirectory('test').toAbsolutePath() def configFile = folder.resolve('nextflow.config') - configFile.text = "workflow.lineage.enabled = true\nworkflow.lineage.store.location = '$folder'".toString() + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() def lidFile = folder.resolve(".meta/12345/.data.json") Files.createDirectories(lidFile.parent) def launcher = Mock(Launcher){