diff --git a/build.gradle b/build.gradle index 28763a2cea..c2d2570bba 100644 --- a/build.gradle +++ b/build.gradle @@ -237,7 +237,7 @@ task compile { def getRuntimeConfigs() { def names = subprojects - .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs','nf-lang'] } + .findAll { prj -> prj.name in ['nextflow','nf-commons','nf-httpfs','nf-lang','nf-lineage'] } .collect { it.name } FileCollection result = null @@ -263,7 +263,7 @@ task exportClasspath { def home = System.getProperty('user.home') def all = getRuntimeConfigs() def libs = all.collect { File file -> /*println file.canonicalPath.replace(home, '$HOME');*/ file.canonicalPath; } - ['nextflow','nf-commons','nf-httpfs','nf-lang'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } + ['nextflow','nf-commons','nf-httpfs','nf-lang','nf-lineage'].each {libs << file("modules/$it/build/libs/${it}-${version}.jar").canonicalPath } file('.launch.classpath').text = libs.unique().join(':') } } @@ -276,7 +276,7 @@ ext.nexusEmail = project.findProperty('nexusEmail') // `signing.keyId` property needs to be defined in the `gradle.properties` file ext.enableSignArchives = project.findProperty('signing.keyId') -ext.coreProjects = projects( ':nextflow', ':nf-commons', ':nf-httpfs', ':nf-lang' ) +ext.coreProjects = projects( ':nextflow', ':nf-commons', ':nf-httpfs', ':nf-lang', ':nf-lineage' ) configure(coreProjects) { group = 'io.nextflow' diff --git a/modules/nextflow/build.gradle b/modules/nextflow/build.gradle index 1e306e2cc8..9d25cfc9bd 100644 --- a/modules/nextflow/build.gradle +++ b/modules/nextflow/build.gradle @@ -51,7 +51,7 @@ dependencies { api 'io.seqera:lib-trace:0.1.0' testImplementation 'org.subethamail:subethasmtp:3.1.7' - + testImplementation (project(':nf-lineage')) // test configuration testFixturesApi ("org.apache.groovy:groovy-test:4.0.26") { exclude group: 'org.apache.groovy' } testFixturesApi ("org.objenesis:objenesis:3.4") diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index e014d3b4a7..a486123362 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -1137,11 +1137,11 @@ class Session implements ISession { } } - void notifyFilePublish(Path destination, Path source=null) { + void notifyFilePublish(Path destination, Path source, Map annotations) { def copy = new ArrayList(observers) for( TraceObserver observer : copy ) { try { - observer.onFilePublish(destination, source) + observer.onFilePublish(destination, source, annotations) } catch( Exception e ) { log.error "Failed to invoke observer on file publish: $observer", e diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy new file mode 100644 index 0000000000..3c27b9fb81 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy @@ -0,0 +1,291 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.cli + +import java.nio.file.Paths + +import com.beust.jcommander.Parameter +import com.beust.jcommander.Parameters +import groovy.transform.CompileStatic +import nextflow.config.ConfigBuilder +import nextflow.config.ConfigMap +import nextflow.exception.AbortOperationException +import nextflow.plugin.Plugins +import org.pf4j.ExtensionPoint + +/** + * CID command line interface + * + * @author Paolo Di Tommaso + */ +@CompileStatic +@Parameters(commandDescription = "Explore workflows lineage metadata", commandNames = ['li']) +class CmdLineage extends CmdBase implements UsageAware { + + private static final String NAME = 'lineage' + + interface LinCommand extends ExtensionPoint { + void log(ConfigMap config) + void describe(ConfigMap config, List args) + void render(ConfigMap config, List args) + void diff(ConfigMap config, List args) + void find(ConfigMap config, List args) + } + + interface SubCmd { + String getName() + String getDescription() + void apply(List args) + void usage() + } + + private List commands = new ArrayList<>() + + private LinCommand operation + + private ConfigMap config + + CmdLineage() { + commands << new CmdLog() + commands << new CmdDescribe() + commands << new CmdRender() + commands << new CmdDiff() + commands << new CmdFind() + } + + @Parameter(hidden = true) + List args + + @Override + String getName() { + return NAME + } + + @Override + void run() { + if( !args ) { + usage(List.of()) + return + } + // setup the plugins system and load the secrets provider + Plugins.init() + // load the config + this.config = new ConfigBuilder() + .setOptions(launcher.options) + .setBaseDir(Paths.get('.')) + .build() + // init plugins + Plugins.load(config) + // load the command operations + this.operation = Plugins.getExtension(LinCommand) + if( !operation ) + throw new IllegalStateException("Unable to load lineage extensions.") + // consume the first argument + getCmd(args).apply(args.drop(1)) + } + + /** + * Print the command usage help + */ + void usage() { + usage(args) + } + + /** + * Print the command usage help + * + * @param args The arguments as entered by the user + */ + void usage(List args) { + if( !args ) { + List result = [] + result << this.getClass().getAnnotation(Parameters).commandDescription() + result << "Usage: nextflow $NAME [options]".toString() + result << '' + result << 'Commands:' + int len = 0 + commands.forEach {len = it.name.size() > len ? it.name.size() : len } + commands.sort(){it.name}.each { result << " ${it.name.padRight(len)}\t${it.description}".toString() } + result << '' + println result.join('\n').toString() + } + else { + def sub = commands.find { it.name == args[0] } + if( sub ) + sub.usage() + else { + throw new AbortOperationException("Unknown $NAME sub-command: ${args[0]}") + } + } + } + + protected SubCmd getCmd(List args) { + + def cmd = commands.find { it.name == args[0] } + if( cmd ) { + return cmd + } + + def matches = commands.collect{ it.name }.closest(args[0]) + def msg = "Unknown cloud sub-command: ${args[0]}" + if( matches ) + msg += " -- Did you mean one of these?\n" + matches.collect { " $it"}.join('\n') + throw new AbortOperationException(msg) + } + + class CmdLog implements SubCmd { + + @Override + String getName() { + return 'list' + } + + @Override + String getDescription() { + return 'List the executions with lineage enabled' + } + + @Override + void apply(List args) { + if (args.size() != 0) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + operation.log(config) + } + + @Override + void usage() { + println description + println "Usage: nextflow $NAME $name" + } + } + + class CmdDescribe implements SubCmd{ + + @Override + String getName() { + return 'view' + } + + @Override + String getDescription() { + return 'Print the description of a Lineage ID (lid)' + } + + void apply(List args) { + if (args.size() != 1) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + + operation.describe(config, args) + } + + @Override + void usage() { + println description + println "Usage: nextflow $NAME $name " + } + } + + class CmdRender implements SubCmd { + + @Override + String getName() { 'render' } + + @Override + String getDescription() { + return 'Render the lineage graph for a workflow output' + } + + void apply(List args) { + if (args.size() < 1 || args.size() > 2) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + + operation.render(config, args) + } + + @Override + void usage() { + println description + println "Usage: nextflow $NAME $name []" + } + + } + + class CmdDiff implements SubCmd { + + @Override + String getName() { 'diff' } + + @Override + String getDescription() { + return 'Show differences between two lineage descriptions' + } + + void apply(List args) { + if (args.size() != 2) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + operation.diff(config, args) + } + + @Override + void usage() { + println description + println "Usage: nextflow $NAME $name " + } + + } + + class CmdFind implements SubCmd { + + @Override + String getName() { 'find' } + + @Override + String getDescription() { + return 'Find lineage metadata descriptions matching with a query' + } + + void apply(List args) { + if (args.size() != 1) { + println("ERROR: Incorrect number of parameters") + usage() + return + } + operation.find(config, args) + } + + @Override + void usage() { + println description + println "Usage: nextflow $NAME $name " + } + + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 94cb29ae07..84488c624a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -354,7 +354,8 @@ class CmdRun extends CmdBase implements HubOptions { runner.session.disableJobsCancellation = getDisableJobsCancellation() final isTowerEnabled = config.navigate('tower.enabled') as Boolean - if( isTowerEnabled || log.isTraceEnabled() ) + final isDataEnabled = config.navigate("lineage.enabled") as Boolean + if( isTowerEnabled || isDataEnabled || log.isTraceEnabled() ) runner.session.resolvedConfig = ConfigBuilder.resolveConfig(scriptFile.parent, this) // note config files are collected during the build process // this line should be after `ConfigBuilder#build` diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy index 9d4d64292c..a17ad69325 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy @@ -107,7 +107,8 @@ class Launcher { new CmdSelfUpdate(), new CmdPlugin(), new CmdInspect(), - new CmdLint() + new CmdLint(), + new CmdLineage() ] if(SecretsLoader.isEnabled()) @@ -120,13 +121,20 @@ class Launcher { options = new CliOptions() jcommander = new JCommander(options) - allCommands.each { cmd -> + for( CmdBase cmd : allCommands ) { cmd.launcher = this; - jcommander.addCommand(cmd.name, cmd) + jcommander.addCommand(cmd.name, cmd, aliases(cmd)) } jcommander.setProgramName( APP_NAME ) } + private static final String[] EMPTY = new String[0] + + private static String[] aliases(CmdBase cmd) { + final aliases = cmd.getClass().getAnnotation(Parameters)?.commandNames() + return aliases ?: EMPTY + } + /** * Create the Jcommander 'interpreter' and parse the command line arguments */ diff --git a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy index eb99b44e71..08aae0e3b3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/dag/MermaidHtmlRenderer.groovy @@ -33,7 +33,7 @@ class MermaidHtmlRenderer implements DagRenderer { file.text = template.replace('REPLACE_WITH_NETWORK_DATA', network) } - private String readTemplate() { + static String readTemplate() { final writer = new StringWriter() final res = MermaidHtmlRenderer.class.getResourceAsStream('mermaid.dag.template.html') int ch diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index 292d64b25f..47f6429999 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -96,6 +96,11 @@ class PublishOp { final overrides = targetResolver instanceof Closure ? [saveAs: targetResolver] : [path: targetResolver] + + if( publishOpts.annotations instanceof Closure ) { + final annotations = publishOpts.annotations as Closure + overrides.annotations = annotations.call(value) as Map + } final publisher = PublishDir.create(publishOpts + overrides) // publish files @@ -214,7 +219,12 @@ class PublishOp { else { log.warn "Invalid extension '${ext}' for index file '${indexPath}' -- should be CSV, JSON, or YAML" } - session.notifyFilePublish(indexPath) + def annotations = publishOpts.annotations + if( publishOpts.annotations instanceof Closure ) { + final annotationClosure = publishOpts.annotations as Closure + annotations = annotationClosure.call() as Map + } + session.notifyFilePublish(indexPath, null, annotations as Map) } log.trace "Publish operator complete" @@ -257,7 +267,7 @@ class PublishOp { */ protected Object normalizePaths(value, targetResolver) { if( value instanceof Path ) { - return List.of(value.getBaseName(), normalizePath(value, targetResolver)) + return normalizePath(value, targetResolver) } if( value instanceof Collection ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/file/LogicalDataPath.groovy b/modules/nextflow/src/main/groovy/nextflow/file/LogicalDataPath.groovy new file mode 100644 index 0000000000..b980bb460e --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/file/LogicalDataPath.groovy @@ -0,0 +1,36 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.file + + +import java.nio.file.Path +/** + * Marker interface for a logical file path associated with a (real) target path. + * + * This interface is used in the {@link nextflow.processor.TaskProcessor} when managing the foreign file staging. + * + * @author Paolo Di Tommaso + */ +interface LogicalDataPath { + /** + * Resolve the logical path to the target path. + * + * @return The real {@link Path} object associated with this logical path. + */ + Path toTargetPath() +} diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 6d0335f9be..1f519d2a2f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -109,6 +109,11 @@ class PublishDir { */ private def tags + /** + * Annotations to be associated to the target file + */ + private Map annotations + /** * The content type of the file. Currently only supported by AWS S3. * This can be either a MIME type content type string or a Boolean value @@ -211,6 +216,9 @@ class PublishDir { if( params.tags != null ) result.tags = params.tags + if( params.annotations != null ) + result.annotations = params.annotations as Map + if( params.contentType instanceof Boolean ) result.contentType = params.contentType else if( params.contentType ) @@ -581,7 +589,7 @@ class PublishDir { } protected void notifyFilePublish(Path destination, Path source=null) { - session.notifyFilePublish(destination, source) + session.notifyFilePublish(destination, source, annotations) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index fb8a06142a..8fb26579b3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -15,8 +15,6 @@ */ package nextflow.processor -import nextflow.trace.TraceRecord - import static nextflow.processor.ErrorStrategy.* import java.lang.reflect.InvocationTargetException @@ -79,6 +77,7 @@ import nextflow.file.FileHelper import nextflow.file.FileHolder import nextflow.file.FilePatternSplitter import nextflow.file.FilePorter +import nextflow.file.LogicalDataPath import nextflow.plugin.Plugins import nextflow.processor.tip.TaskTipProvider import nextflow.script.BaseScript @@ -105,6 +104,7 @@ import nextflow.script.params.TupleInParam import nextflow.script.params.TupleOutParam import nextflow.script.params.ValueInParam import nextflow.script.params.ValueOutParam +import nextflow.trace.TraceRecord import nextflow.util.ArrayBag import nextflow.util.BlankSeparatedList import nextflow.util.CacheHelper @@ -1879,6 +1879,13 @@ class TaskProcessor { return Collections.unmodifiableMap(result) } + protected Path resolvePath(Object item) { + final result = normalizeToPath(item) + return result instanceof LogicalDataPath + ? result.toTargetPath() + : result + } + /** * An input file parameter can be provided with any value other than a file. * This function normalize a generic value to a {@code Path} create a temporary file @@ -1889,7 +1896,6 @@ class TaskProcessor { * @return The {@code Path} that will be staged in the task working folder */ protected FileHolder normalizeInputToFile( Object input, String altName ) { - /* * when it is a local file, just return a reference holder to it */ @@ -1940,9 +1946,9 @@ class TaskProcessor { for( def item : allItems ) { if( item instanceof Path || coerceToPath ) { - def path = normalizeToPath(item) - def target = executor.isForeignFile(path) ? foreignFiles.addToForeign(path) : path - def holder = new FileHolder(target) + final path = resolvePath(item) + final target = executor.isForeignFile(path) ? foreignFiles.addToForeign(path) : path + final holder = new FileHolder(target) files << holder } else { @@ -2274,7 +2280,7 @@ class TaskProcessor { * @return The list of paths of scripts in the project bin folder referenced in the task command */ @Memoized - protected List getTaskBinEntries(String script) { + List getTaskBinEntries(String script) { List result = [] def tokenizer = new StringTokenizer(script," \t\n\r\f()[]{};&|<>`") while( tokenizer.hasMoreTokens() ) { @@ -2307,7 +2313,7 @@ class TaskProcessor { log.info(buffer.toString()) } - protected Map getTaskGlobalVars(TaskRun task) { + Map getTaskGlobalVars(TaskRun task) { final result = task.getGlobalVars(ownerScript.binding) final directives = getTaskExtensionDirectiveVars(task) result.putAll(directives) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index f1dc3f1281..20ab76ec36 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -990,5 +990,9 @@ class TaskRun implements Cloneable { CondaConfig getCondaConfig() { return processor.session.getCondaConfig() } + + String getStubSource() { + return config?.getStubBlock()?.source + } } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy index 17d0b0acaa..1c7898b12c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy @@ -108,6 +108,14 @@ class OutputDsl { private Map opts = [:] + void annotations(Map value) { + setOption('annotations', value) + } + + void annotations(Closure value) { + setOption('annotations', value) + } + void contentType(String value) { setOption('contentType', value) } diff --git a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy index fb85e81923..b4b08f3fee 100644 --- a/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/trace/TraceObserver.groovy @@ -154,4 +154,19 @@ interface TraceObserver { default void onFilePublish(Path destination, Path source){ onFilePublish(destination) } + + /** + * Method that is invoke when an output file is published. + * + * @param destination + * The destination path at `publishDir` folder. + * @param source + * The source path at `workDir` folder. + * @param annotations + * The annotations attached to this file + */ + default void onFilePublish(Path destination, Path source, Map annotations) { + onFilePublish(destination, source) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy index c698676caf..b14b2cfe81 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy @@ -62,14 +62,25 @@ class CsvWriter { ? record.subMap(columns).values() : record.values() } + else if( isSerializable(record) ) { + values = [ record ] + } else { - throw new IllegalArgumentException('Records must be list or map objects') + throw new IllegalArgumentException("Record of type `${record.class.name}` can not be serialized to CSV") } path << values.collect(v -> "\"${toCsvString(v)}\"").join(sep) << '\n' } } + private static boolean isSerializable(value) { + return value == null + || value instanceof Boolean + || value instanceof CharSequence + || value instanceof Number + || value instanceof Path + } + private static String toCsvString(value) { if( value == null ) return "" diff --git a/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy b/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy new file mode 100644 index 0000000000..7da3c5a925 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/util/PathNormalizer.groovy @@ -0,0 +1,93 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.util + +import groovy.transform.CompileStatic +import nextflow.script.WorkflowMetadata + +import java.nio.file.Path + +/** + * + * @author Ben Sherman + */ +@CompileStatic +class PathNormalizer { + + private URL repository + + private String commitId + + private String projectDir + + private String workDir + + PathNormalizer(WorkflowMetadata metadata) { + repository = metadata.repository ? new URL(metadata.repository) : null + commitId = metadata.commitId + projectDir = metadata.projectDir.normalize().toUriString() + workDir = metadata.workDir.normalize().toUriString() + } + + /** + * Normalize paths against the original remote URL, or + * work directory, where appropriate. + * + * @param path + */ + String normalizePath(Path path) { + normalizePath(path.toUriString()) + } + + String normalizePath(String path) { + if(!path) + return null + // replace work directory with relative path + if( path.startsWith(workDir) ) + return path.replace(workDir, 'work') + + // replace project directory with source URL (if applicable) + if( repository && path.startsWith(projectDir) ) + return getProjectSourceUrl(path) + + // encode local absolute paths as file URLs + if( path.startsWith('/') ) + return 'file://' + path + + return path + } + + /** + * Get the source URL for a project asset. + * + * @param path + */ + private String getProjectSourceUrl(String path) { + switch( repository.host ) { + case 'bitbucket.org': + return path.replace(projectDir, "${repository}/src/${commitId}") + case 'github.com': + return path.replace(projectDir, "${repository}/tree/${commitId}") + case 'gitlab.com': + return path.replace(projectDir, "${repository}/-/tree/${commitId}") + default: + return path + } + } + +} diff --git a/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html index 0ab1d9475e..ebbf8e834a 100644 --- a/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html +++ b/modules/nextflow/src/main/resources/nextflow/dag/mermaid.dag.template.html @@ -36,7 +36,7 @@ REPLACE_WITH_NETWORK_DATA diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy new file mode 100644 index 0000000000..54585e7b0d --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdLineageTest.groovy @@ -0,0 +1,338 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.cli + +import nextflow.SysEnv +import nextflow.dag.MermaidHtmlRenderer +import nextflow.lineage.DefaultLinHistoryLog +import nextflow.lineage.LinHistoryRecord +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskRun +import nextflow.lineage.serde.LinEncoder +import nextflow.plugin.Plugins +import java.nio.file.Files +import java.time.OffsetDateTime +import org.junit.Rule +import spock.lang.Specification +import test.OutputCapture + +/** + * CLI lineage Tests + * + * @author Jorge Ejarque + */ +class CmdLineageTest extends Specification { + + def setup() { + // clear the environment to avoid the local env pollute the test env + SysEnv.push([:]) + } + + def cleanup() { + Plugins.stop() + LinStoreFactory.reset() + SysEnv.pop() + } + + def setupSpec() { + LinStoreFactory.reset() + } + + /* + * Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html + */ + @Rule + OutputCapture capture = new OutputCapture() + + def 'should print executions lids' (){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() + def historyFile = folder.resolve(".meta/.history") + def lidLog = new DefaultLinHistoryLog(historyFile) + def uniqueId = UUID.randomUUID() + def date = new Date(); + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + lidLog.write("run_name", uniqueId, "lid://123456", date) + def recordEntry = "${LinHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tlid://123456".toString() + when: + def lidCmd = new CmdLineage(launcher: launcher, args: ["list"]) + lidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 2 + stdout[1] == recordEntry + + cleanup: + folder?.deleteDir() + } + + def 'should print no history' (){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() + def historyFile = folder.resolve(".meta/.history") + Files.createDirectories(historyFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + when: + def lidCmd = new CmdLineage(launcher: launcher, args: ["list"]) + lidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('WARN') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "No workflow runs found in lineage history log" + + cleanup: + folder?.deleteDir() + } + + def 'should show lid content' (){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() + def lidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def time = OffsetDateTime.now() + def encoder = new LinEncoder().withPrettyPrint(true) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam","lid://12345/","lid://123987/", 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + lidFile.text = jsonSer + when: + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid://12345"]) + lidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + + cleanup: + folder?.deleteDir() + } + + def 'should warn if no lid content' (){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + + when: + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid://12345"]) + lidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Error loading lid://12345 - Lineage object 12345 not found" + + cleanup: + folder?.deleteDir() + } + + def 'should get lineage lid content' (){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + def outputHtml = folder.resolve('lineage.html') + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def lidFile = folder.resolve(".meta/12345/file.bam/.data.json") + def lidFile2 = folder.resolve(".meta/123987/file.bam/.data.json") + def lidFile3 = folder.resolve(".meta/123987/.data.json") + def lidFile4 = folder.resolve(".meta/45678/output.txt/.data.json") + def lidFile5 = folder.resolve(".meta/45678/.data.json") + Files.createDirectories(lidFile.parent) + Files.createDirectories(lidFile2.parent) + Files.createDirectories(lidFile3.parent) + Files.createDirectories(lidFile4.parent) + Files.createDirectories(lidFile5.parent) + def encoder = new LinEncoder() + def time = OffsetDateTime.now() + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://45678",null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987", "lid://45678", "lid://123987", 1234, time, time, null) + lidFile2.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "foo", + new Checksum("abcde2345","nextflow","standard"), + 'this is a script', + [new Parameter( "val", "sample_id","ggal_gut"), + new Parameter("path","reads",["lid://45678/output.txt"])], + null, null, null, null, [:],[], null) + lidFile3.text = encoder.encode(entry) + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://45678", "lid://45678", null, 1234, time, time, null) + lidFile4.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "bar", + new Checksum("abfs2556","nextflow","standard"), + 'this is a script', + null,null, null, null, null, [:],[], null) + lidFile5.text = encoder.encode(entry) + final network = """flowchart BT + lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} + lid://123987/file.bam@{shape: document, label: "lid://123987/file.bam"} + lid://123987@{shape: process, label: "foo [lid://123987]"} + ggal_gut@{shape: document, label: "ggal_gut"} + lid://45678/output.txt@{shape: document, label: "lid://45678/output.txt"} + lid://45678@{shape: process, label: "bar [lid://45678]"} + + lid://123987/file.bam -->lid://12345/file.bam + lid://123987 -->lid://123987/file.bam + ggal_gut -->lid://123987 + lid://45678/output.txt -->lid://123987 + lid://45678 -->lid://45678/output.txt +""" + final template = MermaidHtmlRenderer.readTemplate() + def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) + + when: + def lidCmd = new CmdLineage(launcher: launcher, args: ["render", "lid://12345/file.bam", outputHtml.toString()]) + lidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Linage graph for lid://12345/file.bam rendered in ${outputHtml}" + outputHtml.exists() + outputHtml.text == expectedOutput + + cleanup: + folder?.deleteDir() + + } + + def 'should show query results'(){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() + def lidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def encoder = new LinEncoder().withPrettyPrint(true) + def time = OffsetDateTime.now() + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + lidFile.text = jsonSer + when: + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=FileOutput"]) + lidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + + cleanup: + folder?.deleteDir() + } + + def 'should show query results'(){ + given: + def folder = Files.createTempDirectory('test').toAbsolutePath() + def configFile = folder.resolve('nextflow.config') + configFile.text = "lineage.enabled = true\nlineage.store.location = '$folder'".toString() + def lidFile = folder.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def launcher = Mock(Launcher){ + getOptions() >> new CliOptions(config: [configFile.toString()]) + } + def encoder = new LinEncoder().withPrettyPrint(true) + def time = OffsetDateTime.now() + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://12345", "lid://123987/", 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + lidFile.text = jsonSer + when: + def lidCmd = new CmdLineage(launcher: launcher, args: ["view", "lid:///?type=FileOutput"]) + lidCmd.run() + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + + cleanup: + folder?.deleteDir() + } + +} diff --git a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy index 5f51de20f4..de538fd5bc 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy @@ -77,12 +77,12 @@ class OutputDslTest extends Specification { outputDir.resolve('foo/file1.txt').text == 'Hello' outputDir.resolve('barbar/file2.txt').text == 'world' outputDir.resolve('index.csv').text == """\ - "file2","${outputDir}/barbar/file2.txt" + "${outputDir}/barbar/file2.txt" """.stripIndent() and: - 1 * session.notifyFilePublish(outputDir.resolve('foo/file1.txt'), file1) - 1 * session.notifyFilePublish(outputDir.resolve('barbar/file2.txt'), file2) - 1 * session.notifyFilePublish(outputDir.resolve('index.csv')) + 1 * session.notifyFilePublish(outputDir.resolve('foo/file1.txt'), file1, null) + 1 * session.notifyFilePublish(outputDir.resolve('barbar/file2.txt'), file2, null) + 1 * session.notifyFilePublish(outputDir.resolve('index.csv'), null, null) cleanup: SysEnv.pop() diff --git a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy index 255d52bdd3..6f8f6fb777 100644 --- a/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy +++ b/modules/nf-commons/src/main/nextflow/extension/FilesEx.groovy @@ -494,6 +494,7 @@ class FilesEx { return true } catch(IOException e) { + log.debug "Failed to create directory '$self'", e return false } } diff --git a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy index f76e95d55b..e2f7651019 100644 --- a/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/file/FileHelper.groovy @@ -238,7 +238,7 @@ class FileHelper { return !(path.getFileSystem().provider().scheme in UNSUPPORTED_GLOB_WILDCARDS) } - static Path toCanonicalPath(value) { + static Path toPath(value) { if( value==null ) return null @@ -252,6 +252,14 @@ class FileHelper { else { throw new IllegalArgumentException("Unexpected path value: '$value' [${value.getClass().getName()}]") } + return result + } + + static Path toCanonicalPath(value) { + if( value==null ) + return null + + Path result = toPath(value) if( result.fileSystem != FileSystems.default ) { // remote file paths are expected to be absolute by definition @@ -1163,4 +1171,23 @@ class FileHelper { return null } + public static HashCode getTaskHashFromPath(Path sourcePath, Path workPath) { + assert sourcePath + assert workPath + if( !sourcePath.startsWith(workPath) ) + return null + final relativePath = workPath.relativize(sourcePath) + if( relativePath.getNameCount() < 2 ) + return null + final bucket = relativePath.getName(0).toString() + if( bucket.size() != 2 ) + return null + final strHash = bucket + relativePath.getName(1).toString() + try { + return HashCode.fromString(strHash) + } catch (Throwable e) { + log.debug("String '${strHash}' is not a valid hash", e) + return null + } + } } diff --git a/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy b/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy index 8dbe8b5735..d5889f69e0 100644 --- a/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy +++ b/modules/nf-commons/src/main/nextflow/plugin/PluginsFacade.groovy @@ -299,10 +299,12 @@ class PluginsFacade implements PluginStateListener { * The extension with higher priority appears first (lower index) */ def List getPriorityExtensions(Class type,String group=null) { - def result = getExtensions(type) + def extensions = getExtensions(type) if( group ) - result = result.findAll(it -> group0(it)==group ) - return result.sort( it -> priority0(it) ) + extensions = extensions.findAll(it -> group0(it)==group ) + final result = extensions.sort( it -> priority0(it) ) + log.debug "Discovered extensions for type ${type.getName()}: ${extensions.join(',')}" + return result } protected int priority0(Object it) { diff --git a/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy b/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy new file mode 100644 index 0000000000..cdbc8fc5cb --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/Encoder.groovy @@ -0,0 +1,45 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde + +/** + * An interface for encoding and decoding objects between two types. + * + * @param the type of the original object to be encoded. + * @param the type of the encoded representation. + * + * @author Paolo Di Tommaso + */ +interface Encoder { + + /** + * Encodes an object of type {@code T} into its corresponding encoded representation of type {@code S}. + * + * @param object the object to encode + * @return the encoded representation of the object + */ + S encode(T object) + + /** + * Decodes an encoded representation of type {@code S} back into its original form of type {@code T}. + * + * @param encoded the encoded representation to decode + * @return the decoded object + */ + T decode(S encoded) + +} diff --git a/modules/nf-commons/src/main/nextflow/serde/JsonSerializable.groovy b/modules/nf-commons/src/main/nextflow/serde/JsonSerializable.groovy new file mode 100644 index 0000000000..8ec7292156 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/JsonSerializable.groovy @@ -0,0 +1,26 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde + +/** + * Implements a marker interface for Json serialization objects. + * + * @author Paolo Di Tommaso + */ +interface JsonSerializable { + +} diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy new file mode 100644 index 0000000000..8339093f1a --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/GStringSerializer.groovy @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson + +import java.lang.reflect.Type + +import com.google.gson.JsonElement +import com.google.gson.JsonPrimitive +import com.google.gson.JsonSerializationContext +import com.google.gson.JsonSerializer +import groovy.transform.CompileStatic +/** + * Implements a Gson serializer for Groovy GString + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class GStringSerializer implements JsonSerializer { + + @Override + JsonElement serialize(GString src, Type typeOfSrc, JsonSerializationContext context) { + // Convert GString to plain String + return new JsonPrimitive(src.toString()) + } +} diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy new file mode 100644 index 0000000000..47d583c368 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/GsonEncoder.groovy @@ -0,0 +1,102 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson + +import java.lang.reflect.Type +import java.time.Instant +import java.time.OffsetDateTime + +import com.google.gson.Gson +import com.google.gson.GsonBuilder +import com.google.gson.TypeAdapterFactory +import groovy.transform.CompileStatic +import nextflow.serde.Encoder +import nextflow.util.TypeHelper +import org.codehaus.groovy.runtime.GStringImpl + +/** + * Implement a JSON encoder based on Google Gson + * + * @author Paolo Di Tommaso + */ +@CompileStatic +abstract class GsonEncoder implements Encoder { + + private Type type + + private TypeAdapterFactory factory + + private boolean prettyPrint + + private boolean serializeNulls + + private volatile Gson gson + + protected GsonEncoder() { + this.type = TypeHelper.getGenericType(this, 0) + } + + GsonEncoder withTypeAdapterFactory(TypeAdapterFactory factory) { + this.factory = factory + return this + } + + GsonEncoder withPrettyPrint(boolean value) { + this.prettyPrint = value + return this + } + + GsonEncoder withSerializeNulls(boolean value) { + this.serializeNulls = value + return this + } + + private Gson gson0() { + if( gson ) + return gson + synchronized (this) { + if( gson ) + return gson + return gson = create0() + } + } + + private Gson create0() { + final builder = new GsonBuilder() + builder.registerTypeAdapter(Instant.class, new InstantAdapter()) + builder.registerTypeAdapter(OffsetDateTime.class, new OffsetDateTimeAdapter()) + builder.registerTypeAdapter(GStringImpl.class, new GStringSerializer()) + if( factory ) + builder.registerTypeAdapterFactory(factory) + if( prettyPrint ) + builder.setPrettyPrinting() + if( serializeNulls ) + builder.serializeNulls() + return builder.create() + } + + @Override + String encode(T object) { + return gson0().toJson(object, type) + } + + @Override + T decode(String json) { + gson0().fromJson(json, type) + } + +} diff --git a/modules/nf-commons/src/main/nextflow/util/GsonInstantAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy similarity index 80% rename from modules/nf-commons/src/main/nextflow/util/GsonInstantAdapter.groovy rename to modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy index 5415c465a3..80b64676a2 100644 --- a/modules/nf-commons/src/main/nextflow/util/GsonInstantAdapter.groovy +++ b/modules/nf-commons/src/main/nextflow/serde/gson/InstantAdapter.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,15 +12,15 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ -package nextflow.util +package nextflow.serde.gson import java.time.Instant import com.google.gson.TypeAdapter import com.google.gson.stream.JsonReader +import com.google.gson.stream.JsonToken import com.google.gson.stream.JsonWriter import groovy.transform.CompileStatic @@ -30,7 +30,7 @@ import groovy.transform.CompileStatic * @author Paolo Di Tommaso */ @CompileStatic -class GsonInstantAdapter extends TypeAdapter { +class InstantAdapter extends TypeAdapter { @Override void write(JsonWriter writer, Instant value) throws IOException { writer.value(value?.toString()) @@ -38,6 +38,10 @@ class GsonInstantAdapter extends TypeAdapter { @Override Instant read(JsonReader reader) throws IOException { + if( reader.peek() == JsonToken.NULL ) { + reader.nextNull() + return null + } return Instant.parse(reader.nextString()) } } diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy new file mode 100644 index 0000000000..572f03d44d --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/OffsetDateTimeAdapter.groovy @@ -0,0 +1,48 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson + +import java.time.Instant +import java.time.OffsetDateTime + +import com.google.gson.TypeAdapter +import com.google.gson.stream.JsonReader +import com.google.gson.stream.JsonToken +import com.google.gson.stream.JsonWriter +import groovy.transform.CompileStatic + +/** + * Implements a Gson adapter for {@link Instant} + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class OffsetDateTimeAdapter extends TypeAdapter { + @Override + void write(JsonWriter writer, OffsetDateTime value) throws IOException { + writer.value(value?.toString()) + } + + @Override + OffsetDateTime read(JsonReader reader) throws IOException { + if (reader.peek() == JsonToken.NULL) { + reader.nextNull() + return null + } + return OffsetDateTime.parse(reader.nextString()) + } +} diff --git a/modules/nf-commons/src/main/nextflow/serde/gson/RuntimeTypeAdapterFactory.java b/modules/nf-commons/src/main/nextflow/serde/gson/RuntimeTypeAdapterFactory.java new file mode 100644 index 0000000000..f087a35eeb --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/serde/gson/RuntimeTypeAdapterFactory.java @@ -0,0 +1,342 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde.gson; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonPrimitive; +import com.google.gson.TypeAdapter; +import com.google.gson.TypeAdapterFactory; +import com.google.gson.reflect.TypeToken; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; + +/* + * NOTE: this class is copied from Gson extra module which is not included in the default + * library distribution. + * + * See + * https://github.com/google/gson/blob/main/extras/src/main/java/com/google/gson/typeadapters/RuntimeTypeAdapterFactory.java + * + */ + + +/** + * Adapts values whose runtime type may differ from their declaration type. This is necessary when a + * field's type is not the same type that GSON should create when deserializing that field. For + * example, consider these types: + * + *
{@code
+ * abstract class Shape {
+ *   int x;
+ *   int y;
+ * }
+ * class Circle extends Shape {
+ *   int radius;
+ * }
+ * class Rectangle extends Shape {
+ *   int width;
+ *   int height;
+ * }
+ * class Diamond extends Shape {
+ *   int width;
+ *   int height;
+ * }
+ * class Drawing {
+ *   Shape bottomShape;
+ *   Shape topShape;
+ * }
+ * }
+ * + *

Without additional type information, the serialized JSON is ambiguous. Is the bottom shape in + * this drawing a rectangle or a diamond? + * + *

{@code
+ * {
+ *   "bottomShape": {
+ *     "width": 10,
+ *     "height": 5,
+ *     "x": 0,
+ *     "y": 0
+ *   },
+ *   "topShape": {
+ *     "radius": 2,
+ *     "x": 4,
+ *     "y": 1
+ *   }
+ * }
+ * }
+ * + * This class addresses this problem by adding type information to the serialized JSON and honoring + * that type information when the JSON is deserialized: + * + *
{@code
+ * {
+ *   "bottomShape": {
+ *     "type": "Diamond",
+ *     "width": 10,
+ *     "height": 5,
+ *     "x": 0,
+ *     "y": 0
+ *   },
+ *   "topShape": {
+ *     "type": "Circle",
+ *     "radius": 2,
+ *     "x": 4,
+ *     "y": 1
+ *   }
+ * }
+ * }
+ * + * Both the type field name ({@code "type"}) and the type labels ({@code "Rectangle"}) are + * configurable. + * + *

Registering Types

+ * + * Create a {@code RuntimeTypeAdapterFactory} by passing the base type and type field name to the + * {@link #of} factory method. If you don't supply an explicit type field name, {@code "type"} will + * be used. + * + *
{@code
+ * RuntimeTypeAdapterFactory shapeAdapterFactory
+ *     = RuntimeTypeAdapterFactory.of(Shape.class, "type");
+ * }
+ * + * Next register all of your subtypes. Every subtype must be explicitly registered. This protects + * your application from injection attacks. If you don't supply an explicit type label, the type's + * simple name will be used. + * + *
{@code
+ * shapeAdapterFactory.registerSubtype(Rectangle.class, "Rectangle");
+ * shapeAdapterFactory.registerSubtype(Circle.class, "Circle");
+ * shapeAdapterFactory.registerSubtype(Diamond.class, "Diamond");
+ * }
+ * + * Finally, register the type adapter factory in your application's GSON builder: + * + *
{@code
+ * Gson gson = new GsonBuilder()
+ *     .registerTypeAdapterFactory(shapeAdapterFactory)
+ *     .create();
+ * }
+ * + * Like {@code GsonBuilder}, this API supports chaining: + * + *
{@code
+ * RuntimeTypeAdapterFactory shapeAdapterFactory = RuntimeTypeAdapterFactory.of(Shape.class)
+ *     .registerSubtype(Rectangle.class)
+ *     .registerSubtype(Circle.class)
+ *     .registerSubtype(Diamond.class);
+ * }
+ * + *

Serialization and deserialization

+ * + * In order to serialize and deserialize a polymorphic object, you must specify the base type + * explicitly. + * + *
{@code
+ * Diamond diamond = new Diamond();
+ * String json = gson.toJson(diamond, Shape.class);
+ * }
+ * + * And then: + * + *
{@code
+ * Shape shape = gson.fromJson(json, Shape.class);
+ * }
+ */ +public final class RuntimeTypeAdapterFactory implements TypeAdapterFactory { + private final Class baseType; + private final String typeFieldName; + private final Map> labelToSubtype = new LinkedHashMap<>(); + private final Map, String> subtypeToLabel = new LinkedHashMap<>(); + private final boolean maintainType; + private boolean recognizeSubtypes; + + private RuntimeTypeAdapterFactory(Class baseType, String typeFieldName, boolean maintainType) { + if (typeFieldName == null || baseType == null) { + throw new NullPointerException(); + } + this.baseType = baseType; + this.typeFieldName = typeFieldName; + this.maintainType = maintainType; + } + + /** + * Creates a new runtime type adapter for {@code baseType} using {@code typeFieldName} as the type + * field name. Type field names are case sensitive. + * + * @param maintainType true if the type field should be included in deserialized objects + */ + public static RuntimeTypeAdapterFactory of( + Class baseType, String typeFieldName, boolean maintainType) { + return new RuntimeTypeAdapterFactory<>(baseType, typeFieldName, maintainType); + } + + /** + * Creates a new runtime type adapter for {@code baseType} using {@code typeFieldName} as the type + * field name. Type field names are case sensitive. + */ + public static RuntimeTypeAdapterFactory of(Class baseType, String typeFieldName) { + return new RuntimeTypeAdapterFactory<>(baseType, typeFieldName, false); + } + + /** + * Creates a new runtime type adapter for {@code baseType} using {@code "type"} as the type field + * name. + */ + public static RuntimeTypeAdapterFactory of(Class baseType) { + return new RuntimeTypeAdapterFactory<>(baseType, "type", false); + } + + /** + * Ensures that this factory will handle not just the given {@code baseType}, but any subtype of + * that type. + */ + @CanIgnoreReturnValue + public RuntimeTypeAdapterFactory recognizeSubtypes() { + this.recognizeSubtypes = true; + return this; + } + + /** + * Registers {@code type} identified by {@code label}. Labels are case sensitive. + * + * @throws IllegalArgumentException if either {@code type} or {@code label} have already been + * registered on this type adapter. + */ + @CanIgnoreReturnValue + public RuntimeTypeAdapterFactory registerSubtype(Class type, String label) { + if (type == null || label == null) { + throw new NullPointerException(); + } + if (subtypeToLabel.containsKey(type) || labelToSubtype.containsKey(label)) { + throw new IllegalArgumentException("types and labels must be unique"); + } + labelToSubtype.put(label, type); + subtypeToLabel.put(type, label); + return this; + } + + /** + * Registers {@code type} identified by its {@link Class#getSimpleName simple name}. Labels are + * case sensitive. + * + * @throws IllegalArgumentException if either {@code type} or its simple name have already been + * registered on this type adapter. + */ + @CanIgnoreReturnValue + public RuntimeTypeAdapterFactory registerSubtype(Class type) { + return registerSubtype(type, type.getSimpleName()); + } + + @Override + public TypeAdapter create(Gson gson, TypeToken type) { + if (type == null) { + return null; + } + Class rawType = type.getRawType(); + boolean handle = + recognizeSubtypes ? baseType.isAssignableFrom(rawType) : baseType.equals(rawType); + if (!handle) { + return null; + } + + TypeAdapter jsonElementAdapter = gson.getAdapter(JsonElement.class); + Map> labelToDelegate = new LinkedHashMap<>(); + Map, TypeAdapter> subtypeToDelegate = new LinkedHashMap<>(); + for (Map.Entry> entry : labelToSubtype.entrySet()) { + TypeAdapter delegate = gson.getDelegateAdapter(this, TypeToken.get(entry.getValue())); + labelToDelegate.put(entry.getKey(), delegate); + subtypeToDelegate.put(entry.getValue(), delegate); + } + + return new TypeAdapter() { + @Override + public R read(JsonReader in) throws IOException { + JsonElement jsonElement = jsonElementAdapter.read(in); + JsonElement labelJsonElement; + if (maintainType) { + labelJsonElement = jsonElement.getAsJsonObject().get(typeFieldName); + } else { + labelJsonElement = jsonElement.getAsJsonObject().remove(typeFieldName); + } + + if (labelJsonElement == null) { + throw new JsonParseException( + "cannot deserialize " + + baseType + + " because it does not define a field named " + + typeFieldName); + } + String label = labelJsonElement.getAsString(); + @SuppressWarnings("unchecked") // registration requires that subtype extends T + TypeAdapter delegate = (TypeAdapter) labelToDelegate.get(label); + if (delegate == null) { + throw new JsonParseException( + "cannot deserialize " + + baseType + + " subtype named " + + label + + "; did you forget to register a subtype?"); + } + return delegate.fromJsonTree(jsonElement); + } + + @Override + public void write(JsonWriter out, R value) throws IOException { + Class srcType = value.getClass(); + String label = subtypeToLabel.get(srcType); + @SuppressWarnings("unchecked") // registration requires that subtype extends T + TypeAdapter delegate = (TypeAdapter) subtypeToDelegate.get(srcType); + if (delegate == null) { + throw new JsonParseException( + "cannot serialize " + srcType.getName() + "; did you forget to register a subtype?"); + } + JsonObject jsonObject = delegate.toJsonTree(value).getAsJsonObject(); + + if (maintainType) { + jsonElementAdapter.write(out, jsonObject); + return; + } + + JsonObject clone = new JsonObject(); + + if (jsonObject.has(typeFieldName)) { + throw new JsonParseException( + "cannot serialize " + + srcType.getName() + + " because it already defines a field named " + + typeFieldName); + } + clone.add(typeFieldName, new JsonPrimitive(label)); + + for (Map.Entry e : jsonObject.entrySet()) { + clone.add(e.getKey(), e.getValue()); + } + jsonElementAdapter.write(out, clone); + } + }.nullSafe(); + } +} diff --git a/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy b/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy index 3ba3bd04aa..1551f97818 100644 --- a/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy +++ b/modules/nf-commons/src/main/nextflow/util/GsonHelper.groovy @@ -23,19 +23,23 @@ import com.google.gson.Gson import com.google.gson.GsonBuilder import groovy.transform.CompileStatic import groovy.transform.Memoized +import nextflow.serde.gson.InstantAdapter /** * Implements helper for Gson ser-deserialization - * + * + * Deprecated. Use {@link nextflow.serde.gson.GsonEncoder} instead + * * @author Paolo Di Tommaso */ +@Deprecated @CompileStatic class GsonHelper { @Memoized static protected Gson gson() { new GsonBuilder() - .registerTypeAdapter(Instant, new GsonInstantAdapter()) + .registerTypeAdapter(Instant, new InstantAdapter()) .create() } diff --git a/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy b/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy new file mode 100644 index 0000000000..f7ab1604ec --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/util/TypeHelper.groovy @@ -0,0 +1,58 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util + +import java.lang.reflect.ParameterizedType +import java.lang.reflect.Type + +import groovy.transform.CompileStatic + +/** + * A utility class that provides helper methods for working with generic types at runtime. + *

+ * This class is designed to extract type information from objects that have generic superclasses. + *

+ * + * @author Paolo Di Tommaso + */ +@CompileStatic +class TypeHelper { + + /** + * Retrieves the generic type at the specified index from the given object's superclass. + * + *

This method assumes that the object's class extends a parameterized type, + * and it returns the type argument at the given index.

+ * + * @param object the object whose generic type is to be retrieved + * @param index the index of the generic type parameter (starting from 0) + * @return the {@link Type} representing the generic type at the specified index + * + * @example + *
+     * class ExampleClass extends GenericBase<String, Integer> {}
+     *
+     * Type type = TypeHelper.getGenericType(new ExampleClass(), 1);
+     * System.out.println(type); // Output: class java.lang.Integer
+     * 
+ */ + static Type getGenericType(Object object, int index) { + final params = (ParameterizedType) (object.getClass().getGenericSuperclass()) + return params.getActualTypeArguments()[index] + } + +} diff --git a/modules/nf-commons/src/test/nextflow/serde/GsonEncoderTest.groovy b/modules/nf-commons/src/test/nextflow/serde/GsonEncoderTest.groovy new file mode 100644 index 0000000000..87327c082b --- /dev/null +++ b/modules/nf-commons/src/test/nextflow/serde/GsonEncoderTest.groovy @@ -0,0 +1,81 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde + +import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset + +import groovy.transform.EqualsAndHashCode +import nextflow.serde.gson.GsonEncoder +import spock.lang.Specification +/** + * + * @author Paolo Di Tommaso + */ +class GsonEncoderTest extends Specification { + + @EqualsAndHashCode + static class Foo { + String name + Instant timestamp + OffsetDateTime datetime + } + + def 'should serialize-deserialize an object' () { + given: + def encoder = new GsonEncoder() { } + def ts = Instant.ofEpochSecond(1742638384) + def dt = ts.atOffset(ZoneOffset.UTC) + def foo = new Foo(name:'Yo!', timestamp: ts, datetime: dt) + when: + def json = encoder.encode(foo) + then: + json == '{"name":"Yo!","timestamp":"2025-03-22T10:13:04Z","datetime":"2025-03-22T10:13:04Z"}' + encoder.decode(json) == foo + } + + def 'should encode and decode polymorphic class/1'() { + given: + def encoder = new MyEncoder() + def dog = new Dog("bau", 10) + when: + def json = encoder.encode(dog) + then: + json == '{"@type":"Dog","name":"bau","barkVolume":10}' + + when: + def animal = encoder.decode(json) + then: + animal == dog + } + + def 'should encode and decode polymorphic class/1'() { + given: + def encoder = new MyEncoder() + def dog = new Cat("bau", true) + when: + def json = encoder.encode(dog) + then: + json == '{"@type":"Cat","name":"bau","likesSun":true}' + + when: + def animal = encoder.decode(json) + then: + animal == dog + } +} diff --git a/modules/nf-commons/src/test/nextflow/serde/MyEncoder.groovy b/modules/nf-commons/src/test/nextflow/serde/MyEncoder.groovy new file mode 100644 index 0000000000..d223c1bee3 --- /dev/null +++ b/modules/nf-commons/src/test/nextflow/serde/MyEncoder.groovy @@ -0,0 +1,69 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.serde + + +import groovy.transform.CompileStatic +import groovy.transform.EqualsAndHashCode +import nextflow.serde.gson.GsonEncoder +import nextflow.serde.gson.RuntimeTypeAdapterFactory +/** + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class MyEncoder extends GsonEncoder { + + MyEncoder() { + withTypeAdapterFactory( + RuntimeTypeAdapterFactory.of(JsonSerializable.class, "@type") + .registerSubtype(Dog.class, "Dog") + .registerSubtype(Cat.class, "Cat") + ) + } + +} + +@EqualsAndHashCode +class Dog implements JsonSerializable { + private final String name; + int barkVolume; + + Dog(String name, int barkVolume) { + this.name = name; + this.barkVolume = barkVolume; + } + + String getName() { + return name; + } +} + +@EqualsAndHashCode +class Cat implements JsonSerializable { + private final String name; + boolean likesSun; + + Cat(String name, boolean likesSun) { + this.name = name; + this.likesSun = likesSun; + } + + String getName() { + return name; + } +} diff --git a/modules/nf-lineage/build.gradle b/modules/nf-lineage/build.gradle new file mode 100644 index 0000000000..4d7405bfc5 --- /dev/null +++ b/modules/nf-lineage/build.gradle @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +apply plugin: 'groovy' + +sourceSets { + main.java.srcDirs = [] + main.groovy.srcDirs = ['src/main'] + main.resources.srcDirs = ['src/resources'] + test.groovy.srcDirs = ['src/test'] + test.java.srcDirs = [] + test.resources.srcDirs = [] +} + +configurations { + // see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies + runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api' +} + +dependencies { + api project(':nextflow') + + testImplementation(testFixtures(project(":nextflow"))) + testImplementation "org.apache.groovy:groovy:4.0.26" + testImplementation "org.apache.groovy:groovy-nio:4.0.26" +} + diff --git a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinHistoryLog.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinHistoryLog.groovy new file mode 100644 index 0000000000..f7a306f616 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinHistoryLog.groovy @@ -0,0 +1,87 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.lineage + +import java.nio.file.Files +import java.nio.file.Path + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.extension.FilesEx +/** + * File to store a history of the workflow executions and their corresponding LIDs + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class DefaultLinHistoryLog implements LinHistoryLog { + + Path path + + DefaultLinHistoryLog(Path folder) { + this.path = folder + if( !path.exists() ) + Files.createDirectories(path) + } + + void write(String name, UUID key, String runLid, Date date = null) { + assert key + def timestamp = date ?: new Date() + final recordFile = path.resolve(key.toString()) + try { + recordFile.text = new LinHistoryRecord(timestamp, name, key, runLid).toString() + log.trace("Record for $key written in lineage history log ${FilesEx.toUriString(this.path)}") + }catch (Throwable e) { + log.warn("Can't write record $key file ${FilesEx.toUriString(recordFile)}", e.message) + } + } + + void updateRunLid(UUID id, String runLid) { + assert id + final recordFile = path.resolve(id.toString()) + try { + def current = LinHistoryRecord.parse(path.resolve(id.toString()).text) + recordFile.text = new LinHistoryRecord(current.timestamp, current.runName, id, runLid).toString() + } + catch (Throwable e) { + log.warn("Can't read session $id file: ${FilesEx.toUriString(recordFile)}", e.message) + } + } + + List getRecords(){ + List list = new LinkedList() + try { + this.path.eachFile { Path file -> list.add(LinHistoryRecord.parse(file.text))} + } + catch (Throwable e) { + log.warn "Exception reading records from lineage history folder: ${FilesEx.toUriString(this.path)}", e.message + } + return list.sort {it.timestamp } + } + + LinHistoryRecord getRecord(UUID id) { + assert id + final recordFile = path.resolve(id.toString()) + try { + return LinHistoryRecord.parse(recordFile.text) + } catch( Throwable e ) { + log.warn("Can't find session $id in file: ${FilesEx.toUriString(recordFile)}", e.message) + return null + } + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy new file mode 100644 index 0000000000..fb64bbe2c7 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStore.groovy @@ -0,0 +1,148 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import java.nio.file.FileVisitResult +import java.nio.file.FileVisitor +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributes + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.serde.LinSerializable +import nextflow.lineage.config.LineageConfig +import nextflow.exception.AbortOperationException +import nextflow.file.FileHelper +import nextflow.util.TestOnly + +/** + * Default Implementation for the a lineage store. + * + * @author Paolo Di Tommaso + */ +@Slf4j +@CompileStatic +class DefaultLinStore implements LinStore { + + private static String HISTORY_FILE_NAME = ".history" + private static final String METADATA_FILE = '.data.json' + private static final String METADATA_PATH = '.meta' + private static final String DEFAULT_LOCATION = 'lineage' + + private Path metaLocation + private Path location + private LinHistoryLog historyLog + private LinEncoder encoder + + DefaultLinStore open(LineageConfig config) { + location = toLocationPath(config.store.location) + metaLocation = location.resolve(METADATA_PATH) + encoder = new LinEncoder() + if( !Files.exists(metaLocation) && !Files.createDirectories(metaLocation) ) { + throw new AbortOperationException("Unable to create lineage store directory: $metaLocation") + } + historyLog = new DefaultLinHistoryLog(metaLocation.resolve(HISTORY_FILE_NAME)) + return this + } + + protected Path toLocationPath(String location) { + return location + ? FileHelper.toCanonicalPath(location) + : Path.of('.').toAbsolutePath().normalize().resolve(DEFAULT_LOCATION) + } + + @Override + void save(String key, LinSerializable value) { + final path = metaLocation.resolve("$key/$METADATA_FILE") + Files.createDirectories(path.parent) + log.debug "Save LID file path: $path" + path.text = encoder.encode(value) + } + + @Override + LinSerializable load(String key) { + final path = metaLocation.resolve("$key/$METADATA_FILE") + log.debug("Loading from path $path") + if (path.exists()) + return encoder.decode(path.text) as LinSerializable + log.debug("File for key $key not found") + return null + } + + Path getLocation(){ + return location + } + + @TestOnly + Path getMetadataPath() { + return metaLocation + } + + @Override + LinHistoryLog getHistoryLog(){ + return historyLog + } + + @Override + void close() throws IOException { } + + @Override + Map search(String queryString) { + def params = null + if (queryString) { + params = LinUtils.parseQuery(queryString) + } + return searchAllFiles(params) + } + + private Map searchAllFiles(Map params) { + final results = new HashMap() + + Files.walkFileTree(metaLocation, new FileVisitor() { + + @Override + FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + FileVisitResult.CONTINUE + } + + @Override + FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (file.name.startsWith('.data.json') ) { + final lidObject = encoder.decode(file.text) + if (LinUtils.checkParams(lidObject, params)){ + results.put(metaLocation.relativize(file.getParent()).toString(), lidObject as LinSerializable) + } + } + FileVisitResult.CONTINUE + } + + @Override + FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + FileVisitResult.CONTINUE + } + + @Override + FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + FileVisitResult.CONTINUE + } + }) + + return results + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy new file mode 100644 index 0000000000..881442bdb8 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/DefaultLinStoreFactory.groovy @@ -0,0 +1,51 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.lineage + +import java.util.regex.Pattern + +import groovy.transform.CompileStatic +import nextflow.lineage.config.LineageConfig +import nextflow.plugin.Priority + +/** + * Default Factory for Lineage Store. + * + * @author Jorge Ejarque + */ +@CompileStatic +@Priority(0) +class DefaultLinStoreFactory extends LinStoreFactory { + + private static final Pattern SCHEME = ~/^([a-zA-Z][a-zA-Z\d+\-.]*):/ + private static final List SUPPORTED_SCHEMES = List.of('file', 's3', 'gs', 'az') + + @Override + boolean canOpen(LineageConfig config) { + final loc = config.store.location + if( !loc ) { + return true + } + final matcher = SCHEME.matcher(loc) + return matcher.find() ? matcher.group(1) in SUPPORTED_SCHEMES : true + } + + @Override + protected LinStore newInstance(LineageConfig config) { + return new DefaultLinStore() .open(config) + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryLog.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryLog.groovy new file mode 100644 index 0000000000..d95a110c60 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryLog.groovy @@ -0,0 +1,55 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.lineage + +/** + * Interface to log workflow executions and their corresponding Lineage IDs + * + * @author Jorge Ejarque + */ +interface LinHistoryLog { + /** + * Write a workflow execution linage history log record. + * + * @param name Workflow execution name. + * @param sessionId Workflow session ID. + * @param runLid Workflow run ID. + */ + void write(String name, UUID sessionId, String runLid) + + /** + * Updates the run LID for a given session ID. + * + * @param sessionId Workflow session ID. + * @param runLid Workflow run Lineage ID. + */ + void updateRunLid(UUID sessionId, String runLid) + + /** + * Get the store records in the Lineage History Log. + * + * @return List of stored lineage history records. + */ + List getRecords() + + /** + * Get the record for a given + * @param sessionId Workflow session ID. + * @return LinHistoryRecord for the given ID. + */ + LinHistoryRecord getRecord(UUID sessionId) + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy new file mode 100644 index 0000000000..366209215d --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinHistoryRecord.groovy @@ -0,0 +1,70 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.lineage + +import groovy.transform.CompileStatic +import groovy.transform.EqualsAndHashCode + +import java.text.DateFormat +import java.text.SimpleDateFormat + +/** + * Record of workflow executions and their corresponding Lineage IDs + * + * @author Jorge Ejarque + */ +@CompileStatic +@EqualsAndHashCode(includes = 'runName,sessionId') +class LinHistoryRecord { + + static final DateFormat TIMESTAMP_FMT = new SimpleDateFormat('yyyy-MM-dd HH:mm:ss') + + final Date timestamp + final String runName + final UUID sessionId + final String runLid + + LinHistoryRecord(Date timestamp, String name, UUID sessionId, String runLid) { + this.timestamp = timestamp + this.runName = name + this.sessionId = sessionId + this.runLid = runLid + } + + protected LinHistoryRecord() {} + + List toList() { + return List.of( + timestamp ? TIMESTAMP_FMT.format(timestamp) : '-', + runName ?: '-', + sessionId.toString(), + runLid ?: '-', + ) + } + + @Override + String toString() { + toList().join('\t') + } + + static LinHistoryRecord parse(String line) { + final cols = line.tokenize('\t') + if (cols.size() == 4) { + return new LinHistoryRecord(TIMESTAMP_FMT.parse(cols[0]), cols[1], UUID.fromString(cols[2]), cols[3]) + } + throw new IllegalArgumentException("Not a valid history entry: `$line`") + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy new file mode 100644 index 0000000000..081a5bba7a --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserver.groovy @@ -0,0 +1,485 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import static nextflow.lineage.fs.LinPath.* + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributes +import java.time.OffsetDateTime + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.Session +import nextflow.lineage.model.Annotation +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskOutput +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.WorkflowRun +import nextflow.file.FileHelper +import nextflow.file.FileHolder +import nextflow.processor.TaskHandler +import nextflow.processor.TaskRun +import nextflow.script.ScriptMeta +import nextflow.script.params.BaseParam +import nextflow.script.params.CmdEvalParam +import nextflow.script.params.DefaultInParam +import nextflow.script.params.EachInParam +import nextflow.script.params.EnvInParam +import nextflow.script.params.EnvOutParam +import nextflow.script.params.FileInParam +import nextflow.script.params.FileOutParam +import nextflow.script.params.InParam +import nextflow.script.params.OutParam +import nextflow.script.params.StdInParam +import nextflow.script.params.StdOutParam +import nextflow.script.params.ValueInParam +import nextflow.script.params.ValueOutParam +import nextflow.trace.TraceObserver +import nextflow.trace.TraceRecord +import nextflow.util.CacheHelper +import nextflow.util.PathNormalizer +import nextflow.util.SecretHelper +import nextflow.util.TestOnly + +/** + * Observer to write the generated workflow metadata in a lineage store. + * + * @author Paolo Di Tommaso + */ +@Slf4j +@CompileStatic +class LinObserver implements TraceObserver { + private static Map, String> taskParamToValue = [ + (StdOutParam) : "stdout", + (StdInParam) : "stdin", + (FileInParam) : "path", + (FileOutParam) : "path", + (ValueInParam) : "val", + (ValueOutParam): "val", + (EnvInParam) : "env", + (EnvOutParam) : "env", + (CmdEvalParam) : "eval", + (EachInParam) : "each" + ] + + private String executionHash + private LinStore store + private Session session + private WorkflowOutput workflowOutput + private Map outputsStoreDirLid = new HashMap(10) + private PathNormalizer normalizer + + LinObserver(Session session, LinStore store){ + this.session = session + this.store = store + } + + @Override + void onFlowCreate(Session session) { + this.store.getHistoryLog().write(session.runName, session.uniqueId, '-') + } + + @TestOnly + String getExecutionHash(){ executionHash } + + @TestOnly + String setExecutionHash(String hash){ this.executionHash = hash } + + @TestOnly + String setNormalizer(PathNormalizer normalizer){ this.normalizer = normalizer } + + @Override + void onFlowBegin() { + normalizer = new PathNormalizer(session.workflowMetadata) + executionHash = storeWorkflowRun(normalizer) + final executionUri = asUriString(executionHash) + workflowOutput = new WorkflowOutput( + OffsetDateTime.now(), + executionUri, + new LinkedList() + ) + this.store.getHistoryLog().updateRunLid(session.uniqueId, executionUri) + } + + @Override + void onFlowComplete(){ + if (this.workflowOutput){ + workflowOutput.createdAt = OffsetDateTime.now() + final key = executionHash + '#output' + this.store.save(key, workflowOutput) + } + } + + protected Collection allScriptFiles() { + return ScriptMeta.allScriptNames().values() + } + + protected List collectScriptDataPaths(PathNormalizer normalizer) { + final allScripts = allScriptFiles() + final result = new ArrayList(allScripts.size()+1) + // the main script + result.add( new DataPath( + normalizer.normalizePath(session.workflowMetadata.scriptFile.normalize()), + Checksum.of(session.workflowMetadata.scriptId, "nextflow", CacheHelper.HashMode.DEFAULT()) + ) ) + + // all other scripts + for (Path it: allScripts) { + if( it==null || it == session.workflowMetadata.scriptFile ) + continue + final dataPath = new DataPath(normalizer.normalizePath(it.normalize()), Checksum.ofNextflow(it.text)) + result.add(dataPath) + } + return result + } + + protected String storeWorkflowRun(PathNormalizer normalizer) { + // create the workflow object holding script files and repo tracking info + final workflow = new Workflow( + collectScriptDataPaths(normalizer), + session.workflowMetadata.repository, + session.workflowMetadata.commitId + ) + // create the workflow run main object + final value = new WorkflowRun( + workflow, + session.uniqueId.toString(), + session.runName, + getNormalizedParams(session.params, normalizer), + SecretHelper.hideSecrets(session.config.deepClone()) as Map + ) + final executionHash = CacheHelper.hasher(value).hash().toString() + store.save(executionHash, value) + return executionHash + } + + protected static List getNormalizedParams(Map params, PathNormalizer normalizer){ + final normalizedParams = new LinkedList() + params.each{ String key, Object value -> + normalizedParams.add( new Parameter( getParameterType(value), key, normalizeValue(value, normalizer) ) ) + } + return normalizedParams + } + + @Override + void onProcessComplete(TaskHandler handler, TraceRecord trace) { + storeTaskInfo(handler.task) + } + + protected void storeTaskInfo(TaskRun task) { + // store the task run entry + storeTaskRun(task, normalizer) + // store all task results + storeTaskResults(task, normalizer) + } + + protected String storeTaskResults(TaskRun task, PathNormalizer normalizer){ + final outputParams = getNormalizedTaskOutputs(task, normalizer) + final value = new TaskOutput( asUriString(task.hash.toString()), asUriString(executionHash), OffsetDateTime.now(), outputParams ) + final key = task.hash.toString() + '#output' + store.save(key,value) + return key + } + + private List getNormalizedTaskOutputs( TaskRun task, PathNormalizer normalizer){ + final outputs = task.getOutputs() + final outputParams = new LinkedList() + outputs.forEach { OutParam key, Object value -> + manageTaskOutputParameter(key, outputParams, value, task, normalizer) + } + return outputParams + } + + private void manageTaskOutputParameter(OutParam key, LinkedList outputParams, value, TaskRun task, PathNormalizer normalizer) { + if (key instanceof FileOutParam) { + outputParams.add(new Parameter(getParameterType(key), key.name, manageFileOutParam(value, task))) + } else { + outputParams.add(new Parameter(getParameterType(key), key.name, normalizeValue(value, normalizer))) + } + } + + private static Object normalizeValue(Object value, PathNormalizer normalizer) { + if (value instanceof Path) + return normalizer.normalizePath(value as Path) + else if (value instanceof CharSequence) + return normalizer.normalizePath(value.toString()) + else + return value + } + + private Object manageFileOutParam(Object value, TaskRun task) { + if (value == null) { + throw new IllegalArgumentException("Unexpected output null for task '${task.name}'") + } + if (value instanceof Path) { + return asUriString(storeTaskOutput(task, (Path) value)) + } + if (value instanceof Collection) { + final files = new LinkedList() + for (Path it : value) { + files.add( asUriString(storeTaskOutput(task, (Path)it)) ) + } + return files + } + // unexpected task output + throw new IllegalArgumentException("Unexpected output [${value.getClass().getName()}] '${value}' for task '${task.name}'") + } + + protected String storeTaskRun(TaskRun task, PathNormalizer normalizer) { + final codeChecksum = Checksum.ofNextflow(session.stubRun ? task.stubSource : task.source) + final value = new nextflow.lineage.model.TaskRun( + session.uniqueId.toString(), + task.getName(), + codeChecksum, + task.script, + task.inputs ? manageTaskInputParameters(task.inputs, normalizer) : null, + task.isContainerEnabled() ? task.getContainerFingerprint() : null, + normalizer.normalizePath(task.getCondaEnv()), + normalizer.normalizePath(task.getSpackEnv()), + task.config?.getArchitecture()?.toString(), + task.processor.getTaskGlobalVars(task), + task.processor.getTaskBinEntries(task.source).collect { Path p -> new DataPath( + normalizer.normalizePath(p.normalize()), + Checksum.ofNextflow(p) ) + }, + asUriString(executionHash) + ) + + // store in the underlying persistence + final key = task.hash.toString() + store.save(key, value) + return key + } + + protected String storeTaskOutput(TaskRun task, Path path) { + try { + final attrs = readAttributes(path) + final key = getTaskOutputKey(task, path) + final checksum = Checksum.ofNextflow(path) + final value = new FileOutput( + path.toUriString(), + checksum, + asUriString(task.hash.toString()), + asUriString(executionHash), + asUriString(task.hash.toString()), + attrs.size(), + LinUtils.toDate(attrs?.creationTime()), + LinUtils.toDate(attrs?.lastModifiedTime())) + store.save(key, value) + return key + } catch (Throwable e) { + log.warn("Unexpected error storing lineage output '${path.toUriString()}' for task '${task.name}'", e) + return path.toUriString() + } + } + + protected String getTaskOutputKey(TaskRun task, Path path) { + final rel = getTaskRelative(task, path) + return task.hash.toString() + SEPARATOR + rel + } + + protected String getWorkflowOutputKey(Path destination) { + final rel = getWorkflowRelative(destination) + return executionHash + SEPARATOR + rel + } + + protected String getTaskRelative(TaskRun task, Path path){ + if (path.isAbsolute()) { + final rel = getTaskRelative0(task, path) + if (rel) + return rel + throw new IllegalArgumentException("Cannot access the relative path for output '${path.toUriString()}' and task '${task.name}'") + } + //Check if contains workdir or storeDir + final rel = getTaskRelative0(task, path.toAbsolutePath()) + if (rel) return rel + if (path.normalize().getName(0).toString() == "..") + throw new IllegalArgumentException("Cannot access the relative path for output '${path.toUriString()}' and task '${task.name}'" ) + return path.normalize().toString() + } + + private String getTaskRelative0(TaskRun task, Path path){ + final workDirAbsolute = task.workDir.toAbsolutePath() + if (path.startsWith(workDirAbsolute)) { + return workDirAbsolute.relativize(path).toString() + } + //If task output is not in the workDir check if output is stored in the task's storeDir + final storeDir = task.getConfig().getStoreDir().toAbsolutePath() + if( storeDir && path.startsWith(storeDir) ) { + final rel = storeDir.relativize(path) + //If output stored in storeDir, keep the path in case it is used as workflow output + this.outputsStoreDirLid.put(path.toString(), asUriString(task.hash.toString(),rel.toString())) + return rel + } + return null + } + + protected BasicFileAttributes readAttributes(Path path) { + return Files.readAttributes(path, BasicFileAttributes) + } + + @Override + void onFilePublish(Path destination, Path source) { + storePublishedFile(destination, source) + } + + protected void storePublishedFile(Path destination, Path source = null, Map annotations = null){ + try { + final checksum = Checksum.ofNextflow(destination) + final key = getWorkflowOutputKey(destination) + final sourceReference = source ? getSourceReference(source) : asUriString(executionHash) + final attrs = readAttributes(destination) + final value = new FileOutput( + destination.toUriString(), + checksum, + sourceReference, + asUriString(executionHash), + null, + attrs.size(), + LinUtils.toDate(attrs?.creationTime()), + LinUtils.toDate(attrs?.lastModifiedTime()), + convertAnnotations(annotations)) + store.save(key, value) + } catch (Throwable e) { + log.warn("Unexpected error storing published file '${destination.toUriString()}' for workflow '${executionHash}'", e) + } + } + + private static List convertAnnotations(Map annotations){ + if( !annotations ) + return null + final converted = new LinkedList() + annotations.forEach { Object key, Object value -> converted.add(new Annotation(key.toString(), value)) } + return converted + } + + String getSourceReference(Path source){ + final hash = FileHelper.getTaskHashFromPath(source, session.workDir) + if (hash) { + final target = FileHelper.getWorkFolder(session.workDir, hash).relativize(source).toString() + return asUriString(hash.toString(), target) + } + final storeDirReference = outputsStoreDirLid.get(source.toString()) + return storeDirReference ? asUriString(storeDirReference) : null + } + + @Override + void onFilePublish(Path destination){ + storePublishedFile (destination) + } + + @Override + void onWorkflowPublish(String name, Object value){ + workflowOutput.output.add(new Parameter(getParameterType(value), name, convertPathsToLidReferences(value))) + } + + protected static String getParameterType(Object param) { + if( param instanceof BaseParam ) + return taskParamToValue.get(param.class) + // return generic types + if( param instanceof Path ) + return Path.simpleName + if (param instanceof CharSequence) + return String.simpleName + if( param instanceof Collection ) + return Collection.simpleName + if( param instanceof Map) + return Map.simpleName + return param.class.simpleName + } + + private Object convertPathsToLidReferences(Object value){ + if( value instanceof Path ) { + try { + final key = getWorkflowOutputKey(value) + return asUriString(key) + } catch (Throwable e){ + //Workflow output key not found + return value + } + } + + if( value instanceof Collection ) { + return value.collect { el -> convertPathsToLidReferences(el) } + } + + if( value instanceof Map ) { + return value + .findAll { k, v -> v != null } + .collectEntries { k, v -> Map.entry(k, convertPathsToLidReferences(v)) } + } + return value + } + + @Override + void onFilePublish(Path destination, Path source, Map annotations){ + storePublishedFile( destination, source, annotations) + } + + /** + * Relativizes a path from the workflow's output dir. + * + * @param path Path to relativize + * @return Path String with the relative path + * @throws IllegalArgumentException + */ + protected String getWorkflowRelative(Path path) throws IllegalArgumentException{ + final outputDirAbs = session.outputDir.toAbsolutePath() + if (path.isAbsolute()) { + if (path.startsWith(outputDirAbs)) { + return outputDirAbs.relativize(path).toString() + } + throw new IllegalArgumentException("Cannot access relative path for workflow output '${path.toUriString()}'") + } + final pathAbs = path.toAbsolutePath() + if (pathAbs.startsWith(outputDirAbs)) { + return outputDirAbs.relativize(pathAbs).toString() + } + if (path.normalize().getName(0).toString() == "..") + throw new IllegalArgumentException("Cannot access relative path for workflow output '${path.toUriString()}'") + return path.normalize().toString() + } + + protected List manageTaskInputParameters(Map inputs, PathNormalizer normalizer) { + List managedInputs = new LinkedList() + inputs.forEach { param, value -> + if( param instanceof FileInParam ) + managedInputs.add( new Parameter( getParameterType(param), param.name, manageFileInParam( (List)value , normalizer) ) ) + else if( !(param instanceof DefaultInParam) ) + managedInputs.add( new Parameter( getParameterType(param), param.name, value) ) + } + return managedInputs + } + + private List manageFileInParam(List files, PathNormalizer normalizer){ + final paths = new LinkedList(); + for( FileHolder it : files ) { + final ref = getSourceReference(it.storePath) + paths.add(ref ?: new DataPath( + normalizer.normalizePath(it.storePath), + Checksum.ofNextflow(it.storePath)) + ) + } + return paths + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinObserverFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinObserverFactory.groovy new file mode 100644 index 0000000000..3044389d73 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinObserverFactory.groovy @@ -0,0 +1,41 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.trace.TraceObserver +import nextflow.trace.TraceObserverFactory + +/** + * Implements factory for {@link LinObserver} object + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class LinObserverFactory implements TraceObserverFactory { + + @Override + Collection create(Session session) { + final result = new ArrayList(1) + final store = LinStoreFactory.getOrCreate(session) + if( store ) + result.add( new LinObserver(session, store) ) + return result + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy new file mode 100644 index 0000000000..a89f7fc692 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinPropertyValidator.groovy @@ -0,0 +1,80 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import groovy.transform.CompileStatic +import nextflow.lineage.model.Annotation +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskOutput +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.WorkflowRun + +/** + * Class to validate if the string refers to a property in the classes of the Lineage Metadata model. + * + * @author Jorge Ejarque + */ +@CompileStatic +class LinPropertyValidator { + + private static final List LIN_MODEL_CLASSES = [ + Annotation, + Checksum, + DataOutput, + DataPath, + Parameter, + TaskOutput, + TaskRun, + Workflow, + WorkflowOutput, + WorkflowRun, + ] + + private Set validProperties + + LinPropertyValidator() { + this.validProperties = new HashSet() + for( Class clazz : LIN_MODEL_CLASSES ) { + for( MetaProperty field : clazz.metaClass.getProperties() ) { + validProperties.add( field.name) + } + } + } + + void validate(Collection properties) { + for( String property : properties ) { + if( property !in this.validProperties ) { + def msg = "Property '$property' doesn't exist in the lineage model." + final matches = this.validProperties.closest(property) + if( matches ) + msg += " -- Did you mean one of these?" + matches.collect { " $it"}.join(', ') + throw new IllegalArgumentException(msg) + } + } + } + + void validateQueryParams(Map params) { + for( String key : params.keySet() ) { + validate(key.tokenize('.')) + } + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy new file mode 100644 index 0000000000..3f826b7a0a --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStore.groovy @@ -0,0 +1,63 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import groovy.transform.CompileStatic +import nextflow.lineage.serde.LinSerializable +import nextflow.lineage.config.LineageConfig +/** + * Interface for the lineage store + * + * @author Paolo Di Tommaso + */ +@CompileStatic +interface LinStore extends Closeable { + + /** + * Open the lineage store. + * @param config Configuration to open the lineage store. + */ + LinStore open(LineageConfig config) + + /** + * Save a lineage entry in the store for in a given key. + * @param key Entry key. + * @param value Entry object. + */ + void save(String key, LinSerializable value) + + /** + * Load an entry for a given Lineage ID key. + * @param key LID key. + * @return entry value, or null if key does not exists + */ + LinSerializable load(String key) + + /** + * Get the {@link LinHistoryLog} object associated to the lineage store. + * @return {@link LinHistoryLog} object + */ + LinHistoryLog getHistoryLog() + + /** + * Search for lineage entries. + * @queryString Json-path like query string. (Only simple and nested field operators are supported(No array, wildcards,etc.) + * @return Key-lineage entry pairs fulfilling the queryString + */ + Map search(String queryString) + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinStoreFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinStoreFactory.groovy new file mode 100644 index 0000000000..b771766823 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinStoreFactory.groovy @@ -0,0 +1,76 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.Session +import nextflow.lineage.config.LineageConfig +import nextflow.plugin.Plugins +import nextflow.util.TestOnly +import org.pf4j.ExtensionPoint + +/** + * Factory for {@link LinStore} objects + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +abstract class LinStoreFactory implements ExtensionPoint { + + private static LinStore instance + + private static boolean initialized + + protected abstract boolean canOpen(LineageConfig config) + + protected abstract LinStore newInstance(LineageConfig config) + + static LinStore create(LineageConfig config){ + final factory = Plugins + .getPriorityExtensions(LinStoreFactory) + .find( f-> f.canOpen(config)) + if( !factory ) + throw new IllegalStateException("Unable to find Nextflow Lineage store factory") + log.debug "Using Nextflow Lineage store factory: ${factory.getClass().getName()}" + return factory.newInstance(config) + } + + static LinStore getOrCreate(Session session) { + if( instance || initialized ) + return instance + synchronized (LinStoreFactory.class) { + if( instance || initialized ) + return instance + initialized = true + final config = LineageConfig.create(session) + if( !config.enabled ) + return null + return instance = create(config) + } + } + + @TestOnly + static void reset(){ + synchronized (LinStoreFactory.class) { + instance = null + initialized = false + } + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy new file mode 100644 index 0000000000..dfb5a4e634 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/LinUtils.groovy @@ -0,0 +1,330 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import java.nio.file.attribute.FileTime +import java.time.OffsetDateTime +import java.time.ZoneId + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.lineage.fs.LinPath +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.serde.LinSerializable +import nextflow.serde.gson.GsonEncoder +/** + * Utils class for Lineage IDs. + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class LinUtils { + + private static final String[] EMPTY_ARRAY = new String[] {} + + /** + * Query a lineage store. + * + * @param store lineage store to query. + * @param uri Query to perform in a URI-like format. + * Format 'lid://[?QueryString][#fragment]' where: + * - Key: Element where the query will be applied. '/' indicates query will be applied in all the elements of the lineage store. + * - QueryString: all param-value pairs that the lineage element should fulfill in a URI's query string format. + * - Fragment: Element fragment to retrieve. + * @return Collection of object fulfilling the query + */ + static Collection query(LinStore store, URI uri) { + String key = uri.authority ? uri.authority + uri.path : uri.path + if (key == LinPath.SEPARATOR) { + return globalSearch(store, uri) + } else { + final parameters = uri.query ? parseQuery(uri.query) : null + final children = parseChildrenFromFragment(uri.fragment) + return searchPath(store, key, parameters, children ) + } + } + + private static Collection globalSearch(LinStore store, URI uri) { + final results = store.search(uri.query).values() + if (results && uri.fragment) { + // If fragment is defined get the property of the object indicated by the fragment + return filterResults(results, uri.fragment) + } + return results + } + + private static List filterResults(Collection results, String fragment) { + final filteredResults = [] + results.forEach { + final output = navigate(it, fragment) + if (output) { + filteredResults.add(output) + } + } + return filteredResults + } + + /** + * Get the array of the search path children elements from the fragment string + * + * @param fragment String containing the elements separated by '.' + * @return array with the parsed element + */ + static String[] parseChildrenFromFragment(String fragment) { + if( !fragment ) + return EMPTY_ARRAY + final children = fragment.tokenize('.') + new LinPropertyValidator().validate(children) + return children as String[] + } + + /** + * Search for objects inside a description + * + * @param store lineage store + * @param key lineage key where to perform the search + * @param params Parameter-value pairs to be evaluated in the key + * @param children Sub-objects to evaluate and retrieve + * @return List of object + */ + protected static List searchPath(LinStore store, String key, Map params, String[] children = []) { + final object = store.load(key) + if (!object) { + throw new FileNotFoundException("Lineage object $key not found") + } + final results = new LinkedList() + if (children && children.size() > 0) { + treatSubObject(store, key, object, children, params, results) + } else { + treatObject(object, params, results) + } + + return results + } + + private static void treatSubObject(LinStore store, String key, LinSerializable object, String[] children, Map params, LinkedList results) { + final output = getSubObject(store, key, object, children) + if (!output) { + throw new FileNotFoundException("Lineage object $key#${children.join('.')} not found") + } + treatObject(output, params, results) + } + + /** + * Get a metadata sub-object. + * + * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. + * + * @param store Store to retrieve lineage metadata objects. + * @param key Parent metadata key. + * @param object Parent object. + * @param children Array of string in indicating the properties to navigate to get the sub-object. + * @return Sub-object or null in it does not exist. + */ + static Object getSubObject(LinStore store, String key, LinSerializable object, String[] children) { + if( isSearchingOutputs(object, children) ) { + // When asking for a Workflow or task output retrieve the outputs description + final outputs = store.load("${key}#output") + if (!outputs) + return null + return navigate(outputs, children.join('.')) + } + return navigate(object, children.join('.')) + } + + /** + * Check if the Lid pseudo path or query is for Task or Workflow outputs. + * + * @param object Parent Lid metadata object + * @param children Array of string in indicating the properties to navigate to get the sub-object. + * @return return 'true' if the parent is a Task/Workflow run and the first element in children is 'outputs'. Otherwise 'false' + */ + static boolean isSearchingOutputs(LinSerializable object, String[] children) { + return (object instanceof WorkflowRun || object instanceof TaskRun) && children && children[0] == 'output' + } + + /** + * Evaluates object or the objects in a collection matches a set of parameter-value pairs. It includes in the results collection in case of match. + * + * @param object Object or collection of objects to evaluate + * @param params parameter-value pairs to evaluate in each object + * @param results results collection to include the matching objects + */ + protected static void treatObject(def object, Map params, List results) { + if (params) { + if (object instanceof Collection) { + (object as Collection).forEach { treatObject(it, params, results) } + } else if (checkParams(object, params)) { + results.add(object) + } + } else { + results.add(object) + } + } + + /** + * Parses a query string and store them in parameter-value Map. + * + * @param queryString URI-like query string. (e.g. param1=value1¶m2=value2). + * @return Map containing the parameter-value pairs of the query string. + */ + static Map parseQuery(String queryString) { + if( !queryString ) { + return [:] + } + final params = queryString.split('&').collectEntries { + it.split('=').collect { URLDecoder.decode(it, 'UTF-8') } + } as Map + new LinPropertyValidator().validateQueryParams(params) + return params + } + + /** + * Check if an object fulfill the parameter-value + * + * @param object Object to evaluate + * @param params parameter-value pairs to evaluate + * @return true if all object parameters exist and matches with the value, otherwise false. + */ + static boolean checkParams(Object object, Map params) { + for( final entry : params.entrySet() ) { + final value = navigate(object, entry.key) + if( !checkParam(value, entry.value) ) { + return false + } + } + return true + } + + private static boolean checkParam(Object value, Object expected) { + if( !value ) + return false + if( value instanceof Collection ) { + for( final v : value as Collection ) { + if( v.toString() == expected.toString() ) + return true + } + return false + } + return value.toString() == expected.toString() + } + + /** + * Retrieves the sub-object or value indicated by a path. + * + * @param obj Object to navigate + * @param path Elements path separated by '.' e.g. field.subfield + * @return sub-object / value + */ + static Object navigate(Object obj, String path) { + if (!obj) + return null + // type has been replaced by class when evaluating LidSerializable objects + if (obj instanceof LinSerializable && path == 'type') + return obj.getClass()?.simpleName + try { + return path.tokenize('.').inject(obj) { current, key -> + getSubPath(current, key) + } + } + catch (Throwable e) { + log.debug("Error navigating to $path in object", e) + return null + } + } + + private static Object getSubPath(current, String key) { + if (current == null) { + return null + } + if (current instanceof Map) { + return current[key] // Navigate Map properties + } + if (current instanceof Collection) { + return navigateCollection(current, key) + } + if (current.metaClass.hasProperty(current, key)) { + return current.getAt(key) // Navigate Object properties + } + log.debug("No property found for $key") + return null + } + + private static Object navigateCollection(Collection collection, String key) { + final results = [] + for (Object object : collection) { + final res = getSubPath(object, key) + if (res) + results.add(res) + } + if (results.isEmpty() ) { + log.trace("No property found for $key") + return null + } + // Return a single object if only ine results is found. + return results.size() == 1 ? results[0] : results + } + + /** + * Helper function to convert from FileTime to ISO 8601 with offset + * of current timezone. + * + * @param time File time to convert + * @return The {@link OffsetDateTime} for the corresponding file time or null in case of not available (null) + */ + static OffsetDateTime toDate(FileTime time) { + return time != null + ? time.toInstant().atZone(ZoneId.systemDefault()).toOffsetDateTime() + : null + } + + /** + * Helper function to convert from String ISO 8601 to FileTime. + * + * @param date ISO formated time + * @return Converted FileTime or null if date is not available (null or 'N/A') + */ + static FileTime toFileTime(OffsetDateTime date) { + if (!date) + return null + return FileTime.from(date.toInstant()) + } + + /** + * Helper function to unify the encoding of outputs when querying and navigating the lineage pseudoFS. + * Outputs can include LinSerializable objects, collections or parts of these objects. + * LinSerializable objects can be encoded with the LinEncoder, but collections or parts of + * these objects require to extend the GsonEncoder. + * + * @param output Output to encode + * @return Output encoded as a JSON string + */ + static String encodeSearchOutputs(Object output, boolean prettyPrint) { + if (output instanceof LinSerializable) { + return new LinEncoder().withPrettyPrint(prettyPrint).encode(output) + } else { + return new GsonEncoder() {} + .withPrettyPrint(prettyPrint) + .withSerializeNulls(true) + .withTypeAdapterFactory(LinEncoder.newLidTypeAdapterFactory()) + .encode(output) + } + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy new file mode 100644 index 0000000000..43290e46ff --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy @@ -0,0 +1,327 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.cli + +import static nextflow.lineage.fs.LinPath.* + +import java.nio.charset.StandardCharsets +import java.nio.file.Path + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.cli.CmdLineage +import nextflow.config.ConfigMap +import nextflow.dag.MermaidHtmlRenderer +import nextflow.lineage.LinHistoryRecord +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.LinUtils +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.ui.TableBuilder +import org.eclipse.jgit.diff.DiffAlgorithm +import org.eclipse.jgit.diff.DiffFormatter +import org.eclipse.jgit.diff.RawText +import org.eclipse.jgit.diff.RawTextComparator +/** + * Implements lineage command line operations + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class LinCommandImpl implements CmdLineage.LinCommand { + + private static final Path DEFAULT_HTML_FILE = Path.of("lineage.html") + + @Canonical + static class Edge { + String source + String destination + String label + } + + static final private String ERR_NOT_LOADED = 'Error lineage store not loaded - Check Nextflow configuration' + + @Override + void log(ConfigMap config) { + final session = new Session(config) + final store = LinStoreFactory.getOrCreate(session) + if (store) { + printHistory(store) + } else { + println ERR_NOT_LOADED + } + } + + private void printHistory(LinStore store) { + final records = store.historyLog?.records + if( !records ) { + println("No workflow runs found in lineage history log") + return + } + def table = new TableBuilder(cellSeparator: '\t') + .head('TIMESTAMP') + .head('RUN NAME') + .head('SESSION ID') + .head('LINEAGE ID') + for (LinHistoryRecord record : records) { + table.append(record.toList()) + } + println table.toString() + } + + @Override + void describe(ConfigMap config, List args) { + if( !isLidUri(args[0]) ) + throw new Exception("Identifier is not a lineage URL") + final store = LinStoreFactory.getOrCreate(new Session(config)) + if ( !store ) { + println ERR_NOT_LOADED + return + } + try { + def entries = LinUtils.query(store, new URI(args[0])) + if( !entries ) { + println "No entries found for ${args[0]}" + return + } + entries = entries.size() == 1 ? entries[0] : entries + println LinUtils.encodeSearchOutputs(entries, true) + } catch (Throwable e) { + println "Error loading ${args[0]} - ${e.message}" + } + } + + @Override + void render(ConfigMap config, List args) { + final store = LinStoreFactory.getOrCreate(new Session(config)) + if( !store ) { + println ERR_NOT_LOADED + return + } + try { + final renderFile = args.size() > 1 ? Path.of(args[1]) : DEFAULT_HTML_FILE + renderLineage(store, args[0], renderFile) + println("Linage graph for ${args[0]} rendered in $renderFile") + } catch (Throwable e) { + println("ERROR: rendering lineage graph - ${e.message}") + } + } + + private void renderLineage(LinStore store, String dataLid, Path file) { + def lines = [] as List + lines << "flowchart BT".toString() + final nodesToRender = new LinkedList() + nodesToRender.add(dataLid) + final edgesToRender = new LinkedList() + while (!nodesToRender.isEmpty()) { + final node = nodesToRender.removeFirst() + processNode(lines, node, nodesToRender, edgesToRender, store) + } + lines << "" + edgesToRender.each { lines << " ${it.source} -->${it.destination}".toString() } + lines << "" + lines.join('\n') + final template = MermaidHtmlRenderer.readTemplate() + file.text = template.replace('REPLACE_WITH_NETWORK_DATA', lines.join('\n')) + } + + private String safeId( String rawId){ + return rawId.replaceAll(/[^a-zA-Z0-9_.:\/\-]/, '_') + } + + private void processNode(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, LinStore store) { + if (!isLidUri(nodeToRender)) + throw new Exception("Identifier is not a lineage URL") + final key = nodeToRender.substring(LID_PROT.size()) + final lidObject = store.load(key) + switch (lidObject.getClass()) { + case FileOutput: + processDataOutput(lidObject as FileOutput, lines, nodeToRender, nodes, edges) + break; + + case WorkflowRun: + processWorkflowRun(lidObject as WorkflowRun, lines, nodeToRender, edges) + break + + case TaskRun: + processTaskRun(lidObject as TaskRun, lines, nodeToRender, nodes, edges) + break + + default: + throw new Exception("Unrecognized type reference ${lidObject.getClass().getSimpleName()}") + } + } + + private void processTaskRun(TaskRun taskRun, List lines, String nodeToRender, LinkedList nodes, LinkedList edges) { + lines << " ${nodeToRender}@{shape: process, label: \"${taskRun.name} [$nodeToRender]\"}".toString() + final parameters = taskRun.input + for (Parameter source : parameters) { + if (source.type.equals("path")) { + manageFileInParam(lines, nodeToRender, nodes, edges, source.value) + } else { + final label = convertToLabel(source.value.toString()) + final id = safeId(source.value.toString()) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) + } + } + } + + private void processWorkflowRun(WorkflowRun wfRun, List lines, String nodeToRender, LinkedList edges) { + lines << """ ${nodeToRender}@{shape: processes, label: \"${wfRun.name} [${nodeToRender}]\"}""".toString() + final parameters = wfRun.params + parameters.each { + final label = convertToLabel(it.value.toString()) + final id = safeId(it.value.toString()) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) + } + } + + private void processDataOutput(FileOutput lidObject, List lines, String nodeToRender, LinkedList nodes, LinkedList edges){ + lines << " ${nodeToRender}@{shape: document, label: \"${nodeToRender}\"}".toString(); + final source = lidObject.source + if(! source ) + return + if (isLidUri(source)) { + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) + } else { + final label = convertToLabel(source) + final id = safeId(source) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) + } + } + + private String convertToLabel(String label){ + return label.replace('http', 'h\u200Ettp') + } + + private void manageFileInParam(List lines, String nodeToRender, LinkedList nodes, LinkedList edges, value){ + if (value instanceof Collection) { + value.each { manageFileInParam(lines, nodeToRender, nodes, edges, it) } + return + } + if (value instanceof CharSequence) { + final source = value.toString() + if (isLidUri(source)) { + nodes.add(source) + edges.add(new Edge(source, nodeToRender)) + return + } + } + if (value instanceof Map ) { + if (value.path) { + final path = value.path.toString() + if (isLidUri(path)) { + nodes.add(path) + edges.add(new Edge(path, nodeToRender)) + return + } else { + final label = convertToLabel(path) + final id = safeId(path) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) + return + } + } + } + final label = convertToLabel(value.toString()) + final id = safeId(value.toString()) + lines << " ${id}@{shape: document, label: \"${label}\"}".toString(); + edges.add(new Edge(id, nodeToRender)) + } + + @Override + void diff(ConfigMap config, List args) { + if (!isLidUri(args[0]) || !isLidUri(args[1])) + throw new Exception("Identifier is not a lineage URL") + + final store = LinStoreFactory.getOrCreate(new Session(config)) + if (!store) { + println ERR_NOT_LOADED + return + } + try { + final key1 = args[0].substring(LID_PROT.size()) + final entry1 = store.load(key1) + if (!entry1) { + println "No entry found for ${args[0]}." + return + } + final key2 = args[1].substring(LID_PROT.size()) + final entry2 = store.load(key2) + if (!entry2) { + println "No entry found for ${args[1]}." + return + } + final encoder = new LinEncoder().withPrettyPrint(true) + generateDiff(encoder.encode(entry1), key1, encoder.encode(entry2), key2) + } catch (Throwable e) { + println "Error generating diff between ${args[0]}: $e.message" + } + } + + private static void generateDiff(String entry1, String key1, String entry2, String key2) { + // Convert strings to JGit RawText format + final text1 = new RawText(entry1.getBytes(StandardCharsets.UTF_8)) + final text2 = new RawText(entry2.getBytes(StandardCharsets.UTF_8)) + + // Set up the diff algorithm (Git-style diff) + final diffAlgorithm = DiffAlgorithm.getAlgorithm(DiffAlgorithm.SupportedAlgorithm.MYERS) + final diffComparator = RawTextComparator.DEFAULT + + // Compute the differences + final editList = diffAlgorithm.diff(diffComparator, text1, text2) + + final output = new StringBuilder() + // Add header + output.append("diff --git ${key1} ${key2}\n") + output.append("--- ${key1}\n") + output.append("+++ ${key2}\n") + + // Use DiffFormatter to display results in Git-style format + final outputStream = new ByteArrayOutputStream() + final diffFormatter = new DiffFormatter(outputStream) + diffFormatter.setOldPrefix(key1) + diffFormatter.setNewPrefix(key2) + diffFormatter.format(editList, text1, text2) + output.append(outputStream.toString(StandardCharsets.UTF_8)) + + println output.toString() + } + + @Override + void find(ConfigMap config, List args) { + final store = LinStoreFactory.getOrCreate(new Session(config)) + if (!store) { + println ERR_NOT_LOADED + return + } + try { + println LinUtils.encodeSearchOutputs(store.search(args[0]).keySet().collect {asUriString(it)}, true) + } catch (Throwable e){ + println "Error searching for ${args[0]}. ${e.message}" + } + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy new file mode 100644 index 0000000000..798805e5d8 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy @@ -0,0 +1,59 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.config + +import groovy.transform.CompileStatic +import nextflow.Global +import nextflow.Session + +/** + * Model workflow data lineage config + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class LineageConfig { + + final LineageStoreOpts store + + final boolean enabled + + LineageConfig(Map opts) { + this.store = new LineageStoreOpts(opts.store as Map ?: Map.of()) + this.enabled = opts.enabled as boolean ?: false + } + + static Map asMap() { + session?.config?.navigate('lineage') as Map ?: new HashMap() + } + + static LineageConfig create(Session session) { + if( session ) { + return new LineageConfig( session.config.navigate('lineage') as Map ?: Map.of()) + } + else + throw new IllegalStateException("Missing Nextflow session") + } + + static LineageConfig create() { + create(getSession()) + } + + static private Session getSession() { + return Global.session as Session + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy new file mode 100644 index 0000000000..b5bb86512c --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy @@ -0,0 +1,34 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.config + +import groovy.transform.CompileStatic +/** + * Model data store options + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class LineageStoreOpts { + + final String location + + LineageStoreOpts(Map opts) { + this.location = opts.location as String + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy new file mode 100644 index 0000000000..aa8fdaeb69 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystem.groovy @@ -0,0 +1,138 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import com.google.common.collect.ImmutableSet +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory + +import java.nio.file.FileStore +import java.nio.file.FileSystem +import java.nio.file.Path +import java.nio.file.PathMatcher +import java.nio.file.WatchService +import java.nio.file.attribute.UserPrincipalLookupService +import java.nio.file.spi.FileSystemProvider + +import groovy.transform.CompileStatic +import nextflow.lineage.LinStore +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.config.LineageConfig + +/** + * File system for LID Paths + * + * @author Jorge Ejarque + */ +@CompileStatic +class LinFileSystem extends FileSystem { + + private LinFileSystemProvider provider + + private LinStore store + + /* + * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 + */ + protected LinFileSystem(){} + + LinFileSystem(LinFileSystemProvider provider, LineageConfig config) { + this.provider = provider + this.store = LinStoreFactory.create(config) + } + + LinStore getStore() { + return store + } + + @Override + boolean equals( Object other ) { + if( this.class != other.class ) return false + final that = (LinFileSystem)other + this.provider == that.provider && this.store == that.store + } + + @Override + int hashCode() { + Objects.hash(provider,store) + } + + @Override + FileSystemProvider provider() { + return provider + } + + @Override + void close() throws IOException { + + } + + @Override + boolean isOpen() { + return false + } + + @Override + boolean isReadOnly() { + return true + } + + @Override + String getSeparator() { + return LinPath.SEPARATOR + } + + @Override + Iterable getRootDirectories() { + return null + } + + @Override + Iterable getFileStores() { + return null + } + + @Override + Set supportedFileAttributeViews() { + return ImmutableSet.of("basic") + } + + @Override + Path getPath(String first, String... more) { + final path = more ? LinPath.SEPARATOR + more.join(LinPath. SEPARATOR) : '' + return getPath(LinPath.asUri(LinPath.LID_PROT + first + path)) + } + + Path getPath(URI uri){ + return new LinPath(this, uri) + } + + @Override + PathMatcher getPathMatcher(String syntaxAndPattern) { + throw new UnsupportedOperationException(); + } + + @Override + UserPrincipalLookupService getUserPrincipalLookupService() { + throw new UnsupportedOperationException('User Principal Lookup Service not supported') + } + + @Override + WatchService newWatchService() throws IOException { + throw new UnsupportedOperationException('Watch Service not supported') + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy new file mode 100644 index 0000000000..94b7f02064 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy @@ -0,0 +1,375 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import java.nio.ByteBuffer +import java.nio.channels.NonWritableChannelException +import java.nio.channels.SeekableByteChannel +import java.nio.file.AccessDeniedException +import java.nio.file.AccessMode +import java.nio.file.CopyOption +import java.nio.file.DirectoryStream +import java.nio.file.FileStore +import java.nio.file.FileSystem +import java.nio.file.FileSystemNotFoundException +import java.nio.file.LinkOption +import java.nio.file.OpenOption +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.StandardOpenOption +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.attribute.FileAttribute +import java.nio.file.attribute.FileAttributeView +import java.nio.file.spi.FileSystemProvider + +import groovy.transform.CompileStatic +import nextflow.lineage.config.LineageConfig +/** + * File System Provider for LID Paths + * + * @author Jorge Ejarque + */ +@CompileStatic +class LinFileSystemProvider extends FileSystemProvider { + + public static final String SCHEME = "lid" + + private LinFileSystem fileSystem + + @Override + String getScheme() { + return SCHEME + } + + protected LinPath toLinPath(Path path) { + if (path !instanceof LinPath) + throw new ProviderMismatchException() + if (path instanceof LinMetadataPath) + return (LinMetadataPath) path + return (LinPath) path + } + + private void checkScheme(URI uri) { + final scheme = uri.scheme.toLowerCase() + if (scheme != getScheme()) + throw new IllegalArgumentException("Not a valid ${getScheme().toUpperCase()} scheme: $scheme") + } + + @Override + synchronized FileSystem newFileSystem(URI uri, Map config) throws IOException { + checkScheme(uri) + if (fileSystem) { + return fileSystem + } + //Overwrite default values with provided configuration + final defaultConfig = LineageConfig.asMap() + if (config) { + for (Map.Entry e : config.entrySet()) { + defaultConfig.put(e.key, e.value) + } + } + return fileSystem = new LinFileSystem(this, new LineageConfig(defaultConfig)) + } + + @Override + FileSystem getFileSystem(URI uri) throws FileSystemNotFoundException { + if (!fileSystem) + throw new FileSystemNotFoundException() + return fileSystem + } + + synchronized FileSystem getFileSystemOrCreate(URI uri) { + checkScheme(uri) + if (!fileSystem) { + fileSystem = (LinFileSystem) newFileSystem(uri, LineageConfig.asMap()) + } + return fileSystem + } + + @Override + LinPath getPath(URI uri) { + return (LinPath) ((LinFileSystem) getFileSystemOrCreate(uri)).getPath(uri) + } + + @Override + OutputStream newOutputStream(Path path, OpenOption... options) throws IOException { + throw new UnsupportedOperationException("Write not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + InputStream newInputStream(Path path, OpenOption... options) throws IOException { + final lid = toLinPath(path) + if (lid instanceof LinMetadataPath) + return (lid as LinMetadataPath).newInputStream() + return newInputStream0(lid, options) + } + + private static InputStream newInputStream0(LinPath lid, OpenOption... options) throws IOException { + final realPath = lid.getTargetOrMetadataPath() + if (realPath instanceof LinMetadataPath) + return (realPath as LinMetadataPath).newInputStream() + return realPath.fileSystem.provider().newInputStream(realPath, options) + } + + @Override + SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { + final lid = toLinPath(path) + validateOptions(options) + return newByteChannel0(lid, options, attrs) + } + + @CompileStatic + private class LinPathSeekableByteChannel implements SeekableByteChannel { + SeekableByteChannel channel + + LinPathSeekableByteChannel(SeekableByteChannel channel) { + this.channel = channel + } + + @Override + int read(ByteBuffer dst) throws IOException { + return channel.read(dst) + } + + @Override + int write(ByteBuffer src) throws IOException { + throw new NonWritableChannelException(){} + } + + @Override + long position() throws IOException { + return channel.position() + } + + @Override + SeekableByteChannel position(long newPosition) throws IOException { + channel.position(newPosition) + return this + } + + @Override + long size() throws IOException { + return channel.size() + } + + @Override + SeekableByteChannel truncate(long unused) throws IOException { + throw new NonWritableChannelException() + } + + @Override + boolean isOpen() { + return channel.isOpen() + } + + @Override + void close() throws IOException { + channel.close() + } + } + + private static void validateOptions(Set options) { + if (!options || options.empty) + return + for (OpenOption opt : options) { + // All OpenOption values except for APPEND and WRITE are allowed + if (opt == StandardOpenOption.APPEND || opt == StandardOpenOption.WRITE) + throw new UnsupportedOperationException("'$opt' not allowed"); + } + + } + + private SeekableByteChannel newByteChannel0(LinPath lid, Set options, FileAttribute... attrs) { + if (lid instanceof LinMetadataPath) { + return (lid as LinMetadataPath).newSeekableByteChannel() + } + final realPath = lid.getTargetOrMetadataPath() + if (realPath instanceof LinMetadataPath) { + return (realPath as LinMetadataPath).newSeekableByteChannel() + } else { + SeekableByteChannel channel = realPath.fileSystem.provider().newByteChannel(realPath, options, attrs) + return new LinPathSeekableByteChannel(channel) + } + } + + @Override + DirectoryStream newDirectoryStream(Path path, DirectoryStream.Filter filter) throws IOException { + final lid = toLinPath(path) + final real = lid.getTargetPath() + final stream = real + .getFileSystem() + .provider() + .newDirectoryStream(real, new LidFilter(fileSystem)) + + return new DirectoryStream() { + + @Override + Iterator iterator() { + return new LidIterator(fileSystem, stream.iterator(), lid, real) + } + + @Override + void close() throws IOException { + stream.close() + } + } + } + + @CompileStatic + private class LidFilter implements DirectoryStream.Filter { + + private final LinFileSystem fs + + LidFilter(LinFileSystem fs) { + this.fs = fs + } + + @Override + boolean accept(Path entry) throws IOException { + return true + } + } + + private static LinPath fromRealToLinPath(Path toConvert, Path realBase, LinPath lidBase) { + if (toConvert.isAbsolute()) { + if (toConvert.class != realBase.class) { + throw new ProviderMismatchException() + } + final relative = realBase.relativize(toConvert) + return (LinPath) lidBase.resolve(relative.toString()) + } else { + return (LinPath) lidBase.resolve(toConvert.toString()) + } + } + + private static class LidIterator implements Iterator { + + private final LinFileSystem fs + private final Iterator target + private final LinPath parent + private final Path parentReal + + LidIterator(LinFileSystem fs, Iterator itr, LinPath parent, Path real) { + this.fs = fs + this.target = itr + this.parent = parent + this.parentReal = real + } + + @Override + boolean hasNext() { + return target.hasNext() + } + + @Override + LinPath next() { + final path = target.next() + return path ? fromRealToLinPath(path, parentReal, parent) : null + } + } + + @Override + void createDirectory(Path dir, FileAttribute... attrs) throws IOException { + throw new UnsupportedOperationException("Create directory not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void delete(Path path) throws IOException { + throw new UnsupportedOperationException("Delete not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void copy(Path source, Path target, CopyOption... options) throws IOException { + throw new UnsupportedOperationException("Copy not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void move(Path source, Path target, CopyOption... options) throws IOException { + throw new UnsupportedOperationException("Move not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + boolean isSameFile(Path path, Path path2) throws IOException { + return path == path2 + } + + @Override + boolean isHidden(Path path) throws IOException { + return toLinPath(path).getTargetOrMetadataPath().isHidden() + } + + @Override + FileStore getFileStore(Path path) throws IOException { + throw new UnsupportedOperationException("File store not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void checkAccess(Path path, AccessMode... modes) throws IOException { + validateAccessModes(modes) + final lid = toLinPath(path) + if (lid instanceof LinMetadataPath) + return + checkAccess0(lid, modes) + } + + private void checkAccess0(LinPath lid, AccessMode... modes) { + final real = lid.getTargetOrMetadataPath() + if (real instanceof LinMetadataPath) + return + real.fileSystem.provider().checkAccess(real, modes) + } + + private void validateAccessModes(AccessMode... modes) { + for (AccessMode m : modes) { + if (m == AccessMode.WRITE) + throw new AccessDeniedException("Write mode not supported") + if (m == AccessMode.EXECUTE) + throw new AccessDeniedException("Execute mode not supported") + } + } + + @Override + V getFileAttributeView(Path path, Class type, LinkOption... options) { + return null + } + + @Override + A readAttributes(Path path, Class type, LinkOption... options) throws IOException { + final lid = toLinPath(path) + if (lid instanceof LinMetadataPath) + return (lid as LinMetadataPath).readAttributes(type) + return readAttributes0(lid, type, options) + } + + private A readAttributes0(LinPath lid, Class type, LinkOption... options) throws IOException { + final real = lid.getTargetOrMetadataPath() + if (real instanceof LinMetadataPath) + return (real as LinMetadataPath).readAttributes(type) + return real.fileSystem.provider().readAttributes(real, type, options) + } + + @Override + Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { + throw new UnsupportedOperationException("Read file attributes not supported by ${getScheme().toUpperCase()} file system provider") + } + + @Override + void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { + throw new UnsupportedOperationException("Set file attributes not supported by ${getScheme().toUpperCase()} file system provider") + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataPath.groovy new file mode 100644 index 0000000000..9922fa9cee --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataPath.groovy @@ -0,0 +1,79 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import groovy.transform.CompileStatic + +import java.nio.channels.SeekableByteChannel +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.attribute.FileTime + +/** + * Class to model the metadata descriptions as a file. + * + * @author Jorge Ejarque + */ +@CompileStatic +class LinMetadataPath extends LinPath { + private byte[] results + private FileTime creationTime + + LinMetadataPath(String resultsObject, FileTime creationTime, LinFileSystem fs, String path, String[] childs) { + super(fs, "${path}${childs ? '#'+ childs.join('.') : ''}") + this.results = resultsObject.getBytes("UTF-8") + this.creationTime = creationTime + } + + InputStream newInputStream() { + return new ByteArrayInputStream(results) + } + + SeekableByteChannel newSeekableByteChannel(){ + return new LinMetadataSeekableByteChannel(results) + } + + A readAttributes(Class type){ + return (A) new BasicFileAttributes() { + @Override + long size() { return results.length } + + @Override + FileTime lastModifiedTime() { return creationTime } + + @Override + FileTime lastAccessTime() { return creationTime } + + @Override + FileTime creationTime() { return creationTime } + + @Override + boolean isRegularFile() { return true } + + @Override + boolean isDirectory() { return false } + + @Override + boolean isSymbolicLink() { return false } + + @Override + boolean isOther() { return false } + + @Override + Object fileKey() { return null } + } + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataSeekableByteChannel.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataSeekableByteChannel.groovy new file mode 100644 index 0000000000..6a78cd9b1e --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinMetadataSeekableByteChannel.groovy @@ -0,0 +1,76 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import groovy.transform.CompileStatic + +import java.nio.ByteBuffer +import java.nio.channels.ClosedChannelException +import java.nio.channels.NonWritableChannelException +import java.nio.channels.SeekableByteChannel + +/** + * SeekableByteChannel for metadata results description as a file. + * + * @author Jorge Ejarque + */ +@CompileStatic +class LinMetadataSeekableByteChannel implements SeekableByteChannel { + private final ByteBuffer buffer + private boolean open + + LinMetadataSeekableByteChannel(byte[] bytes){ + this.open = true + this.buffer = ByteBuffer.wrap(bytes) + } + + @Override + int read(ByteBuffer dst) { + if (!open) throw new ClosedChannelException() + if (!buffer.hasRemaining()) return -1 + int remaining = Math.min(dst.remaining(), buffer.remaining()) + byte[] temp = new byte[remaining] + buffer.get(temp) + dst.put(temp) + return remaining + } + + @Override + int write(ByteBuffer src) { throw new NonWritableChannelException() } + + @Override + long position() { return buffer.position() } + + @Override + SeekableByteChannel position(long newPosition) { + if (newPosition < 0 || newPosition > buffer.limit()) throw new IllegalArgumentException() + buffer.position((int) newPosition) + return this + } + + @Override + long size() { return buffer.limit() } + + @Override + SeekableByteChannel truncate(long size) { throw new NonWritableChannelException() } + + @Override + boolean isOpen() { return open } + + @Override + void close() { open = false } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy new file mode 100644 index 0000000000..8d0559f55a --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy @@ -0,0 +1,512 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.file.FileHelper +import nextflow.file.LogicalDataPath +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.FileOutput +import nextflow.lineage.serde.LinSerializable +import nextflow.util.CacheHelper +import nextflow.util.TestOnly + +import static LinFileSystemProvider.* +import static nextflow.lineage.LinUtils.* + +import java.nio.file.FileSystem +import java.nio.file.LinkOption +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.WatchEvent +import java.nio.file.WatchKey +import java.nio.file.WatchService +import java.time.OffsetDateTime + +/** + * LID file system path + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class LinPath implements Path, LogicalDataPath { + + static public final List SUPPORTED_CHECKSUM_ALGORITHMS=["nextflow"] + static public final String SEPARATOR = '/' + public static final String LID_PROT = "${SCHEME}://" + + static private final String[] EMPTY = new String[] {} + + private LinFileSystem fileSystem + + // String with the lineage file path + private String filePath + + private String query + + private String fragment + + /* + * Only needed to prevent serialization issues - see https://github.com/nextflow-io/nextflow/issues/5208 + */ + protected LinPath(){} + + LinPath(LinFileSystem fs, URI uri) { + if( uri.scheme != SCHEME ) { + throw new IllegalArgumentException("Invalid LID URI - scheme is different for $SCHEME") + } + this.fileSystem = fs + this.query = uri.query + this.fragment = uri.fragment + this.filePath = resolve0( fs, norm0("${uri.authority?:''}${uri.path}") ) + } + + protected LinPath(String query, String fragment, String filepath, LinFileSystem fs){ + this.fileSystem = fs + this.query = query + this.fragment = fragment + this.filePath = filepath + } + + LinPath(LinFileSystem fs, String path) { + this( fs, asUri( LID_PROT + norm0(path)) ) + } + + LinPath(LinFileSystem fs, String first, String[] more) { + this( fs, asUri( LID_PROT + buildPath(first, more) ) ) + } + + static String asUriString(String first, String... more) { + return LID_PROT + buildPath(first, more) + } + + static boolean isLidUri(String path) { + return path && path.startsWith(LID_PROT) + } + + private static String buildPath(String first, String[] more){ + first = norm0(first) + if (more){ + final morePath = norm0(more).join(SEPARATOR) + return first.isEmpty() ? morePath : first + SEPARATOR + morePath + } + return first + } + + protected static void validateDataOutput(FileOutput lidObject) { + final hashedPath = FileHelper.toCanonicalPath(lidObject.path as String) + if( !hashedPath.exists() ) + throw new FileNotFoundException("Target path $lidObject.path does not exist") + validateChecksum(lidObject.checksum, hashedPath) + } + + protected static void validateChecksum(Checksum checksum, Path hashedPath) { + if( !checksum) + return + if( ! isAlgorithmSupported(checksum.algorithm) ) { + log.warn("Checksum of '$hashedPath' can't be validated. Algorithm '${checksum.algorithm}' is not supported") + return + } + final hash = checksum.mode + ? CacheHelper.hasher(hashedPath, CacheHelper.HashMode.of(checksum.mode.toString().toLowerCase())).hash().toString() + : CacheHelper.hasher(hashedPath).hash().toString() + if (hash != checksum.value) + log.warn("Checksum of '$hashedPath' does not match with the one stored in the metadata") + } + + protected static isAlgorithmSupported( String algorithm ){ + return algorithm && algorithm in SUPPORTED_CHECKSUM_ALGORITHMS + } + + @TestOnly + protected String getFilePath(){ this.filePath } + + /** + * Finds the target path of a LinPath. + * + * @param fs LinFileSystem associated to the LinPath to find + * @param filePath Path associated to the LinPath to find + * @param resultsAsPath True to return metadata descriptions as LinMetadataPath + * @param children Sub-object/path inside the description + * @return Path or LinMetadataPath associated to the LinPath + * @throws Exception + * IllegalArgumentException if the filepath, filesystem or its LinStore are null. + * FileNotFoundException if the filePath or children are not found in the LinStore. + */ + protected static Path findTarget(LinFileSystem fs, String filePath, boolean resultsAsPath, String[] children=[]) throws Exception { + if( !fs ) + throw new IllegalArgumentException("Cannot get target path for a relative lineage path") + if( filePath.isEmpty() || filePath == SEPARATOR ) + throw new IllegalArgumentException("Cannot get target path for an empty lineage path") + final store = fs.getStore() + if( !store ) + throw new Exception("Lineage store not found - Check Nextflow configuration") + final object = store.load(filePath) + if ( object ){ + if( object instanceof FileOutput ) { + return getTargetPathFromOutput(object, children) + } + if( resultsAsPath ){ + return getMetadataAsTargetPath(object, fs, filePath, children) + } + } else { + // If there isn't metadata check the parent to check if it is a subfolder of a task/workflow output + final currentPath = Path.of(filePath) + final parent = Path.of(filePath).getParent() + if( parent ) { + ArrayList newChildren = new ArrayList() + newChildren.add(currentPath.getFileName().toString()) + newChildren.addAll(children) + //resultsAsPath set to false because parent paths are only inspected for DataOutputs + return findTarget(fs, parent.toString(), false, newChildren as String[]) + } + } + throw new FileNotFoundException("Target path '$filePath' does not exist") + } + + protected static Path getMetadataAsTargetPath(LinSerializable results, LinFileSystem fs, String filePath, String[] children){ + if( !results ) { + throw new FileNotFoundException("Target path '$filePath' does not exist") + } + if (children && children.size() > 0) { + return getSubObjectAsPath(fs, filePath, results, children) + } else { + return generateLinMetadataPath(fs, filePath, results, children) + } + } + + /** + * Get a metadata sub-object as LinMetadataPath. + * If the requested sub-object is the workflow or task outputs, retrieves the outputs from the outputs description. + * + * @param fs LinFilesystem for the te. + * @param key Parent metadata key. + * @param object Parent object. + * @param children Array of string in indicating the properties to navigate to get the sub-object. + * @return LinMetadataPath or null in it does not exist + */ + static LinMetadataPath getSubObjectAsPath(LinFileSystem fs, String key, LinSerializable object, String[] children) { + if( isSearchingOutputs(object, children) ) { + // When asking for a Workflow or task output retrieve the outputs description + final outputs = fs.store.load("${key}/output") + if( !outputs ) { + throw new FileNotFoundException("Target path '$key#output' does not exist") + } + return generateLinMetadataPath(fs, key, outputs, children) + } + else { + return generateLinMetadataPath(fs, key, object, children) + } + } + + private static LinMetadataPath generateLinMetadataPath(LinFileSystem fs, String key, Object object, String[] children){ + def creationTime = toFileTime(navigate(object, 'createdAt') as OffsetDateTime ?: OffsetDateTime.now()) + final output = children ? navigate(object, children.join('.')) : object + if( !output ) { + throw new FileNotFoundException("Target path '$key#${children.join('.')}' does not exist") + } + return new LinMetadataPath(encodeSearchOutputs(output, true), creationTime, fs, key, children) + } + + private static Path getTargetPathFromOutput(FileOutput object, String[] children) { + final lidObject = object as FileOutput + // return the real path stored in the metadata + validateDataOutput(lidObject) + def realPath = FileHelper.toCanonicalPath(lidObject.path as String) + if (children && children.size() > 0) + realPath = realPath.resolve(children.join(SEPARATOR)) + if (!realPath.exists()) + throw new FileNotFoundException("Target path '$realPath' does not exist") + return realPath + } + + private static boolean isEmptyBase(LinFileSystem fs, String base){ + return !base || base == SEPARATOR || (fs && base == "..") + } + + private static String resolve0(LinFileSystem fs, String base, String[] more) { + if( isEmptyBase(fs,base) ) { + return resolveEmptyPathCase(fs, more as List) + } + if( base.contains(SEPARATOR) ) { + final parts = base.tokenize(SEPARATOR) + final remain = parts[1..-1] + more.toList() + return resolve0(fs, parts[0], remain as String[]) + } + final result = Path.of(base) + return more ? result.resolve(more.join(SEPARATOR)).toString() : result.toString() + } + + private static String resolveEmptyPathCase(LinFileSystem fs, List more ){ + switch(more.size()) { + case 0: + return "/" + case 1: + return resolve0(fs, more[0], EMPTY) + default: + return resolve0(fs, more[0], more[1..-1] as String[]) + } + } + + static private String norm0(String path) { + if( !path || path==SEPARATOR) + return "" + //Remove repeated elements + path = Path.of(path.trim()).normalize().toString() + //Remove initial and final separators + if( path.startsWith(SEPARATOR) ) + path = path.substring(1) + if( path.endsWith(SEPARATOR) ) + path = path.substring(0,path.size()-1) + return path + } + + static private String[] norm0(String... path) { + for( int i=0; i1 ) + return subpath(0,c-1) + if( c==1 ) + return new LinPath(fileSystem,SEPARATOR) + return null + } + + @Override + int getNameCount() { + return Path.of(filePath).nameCount + } + + @Override + Path getName(int index) { + if( index<0 ) + throw new IllegalArgumentException("Path name index cannot be less than zero - offending value: $index") + final path = Path.of(filePath) + if (index == path.nameCount - 1){ + return new LinPath( fragment, query, path.getName(index).toString(), null) + } + return new LinPath(index==0 ? fileSystem : null, path.getName(index).toString()) + } + + @Override + Path subpath(int beginIndex, int endIndex) { + if( beginIndex<0 ) + throw new IllegalArgumentException("subpath begin index cannot be less than zero - offending value: $beginIndex") + final path = Path.of(filePath) + return new LinPath(beginIndex==0 ? fileSystem : null, path.subpath(beginIndex, endIndex).toString()) + } + + @Override + Path normalize() { + return new LinPath(fileSystem, Path.of(filePath).normalize().toString()) + } + + @Override + boolean startsWith(Path other) { + return startsWith(other.toString()) + } + + @Override + boolean startsWith(String other) { + return filePath.startsWith(other) + } + + @Override + boolean endsWith(Path other) { + return endsWith(other.toString()) + } + + @Override + boolean endsWith(String other) { + return filePath.endsWith(other) + } + + @Override + Path resolve(Path other) { + if( LinPath.class != other.class ) + throw new ProviderMismatchException() + + final that = (LinPath)other + + if( that.fileSystem && this.fileSystem != that.fileSystem ) + return other + if( that.isAbsolute() ) { + return that + } else { + final newPath = Path.of(filePath).resolve(that.toString()) + return new LinPath(that.query, that.fragment, newPath.toString(), fileSystem) + } + } + + @Override + Path resolve(String path) { + if( !path ) + return this + final scheme = FileHelper.getUrlProtocol(path) + if( !scheme ) { + // consider the path as a lid relative path + return resolve(new LinPath(null,path)) + } + if( scheme != SCHEME ) { + throw new ProviderMismatchException() + } + final that = fileSystem.provider().getPath(asUri(path)) + return resolve(that) + } + + @Override + Path relativize(Path other) { + if( LinPath.class != other.class ) { + throw new ProviderMismatchException() + } + LinPath lidOther = other as LinPath + if( this.isAbsolute() != lidOther.isAbsolute() ) + throw new IllegalArgumentException("Cannot compare absolute with relative paths"); + def path + if( this.isAbsolute() ) { + // Compare 'filePath' as absolute paths adding the root separator + path = Path.of(SEPARATOR + filePath).relativize(Path.of(SEPARATOR + lidOther.filePath)) + } else { + // Compare 'filePath' as relative paths + path = Path.of(filePath).relativize(Path.of(lidOther.filePath)) + } + return new LinPath(lidOther.query, lidOther.fragment, path.getNameCount()>0 ? path.toString() : SEPARATOR, null) + } + + @Override + URI toUri() { + return asUri("${SCHEME}://${filePath}${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}") + } + + String toUriString() { + return toUri().toString() + } + + @Override + Path toAbsolutePath() { + return this + } + + @Override + Path toRealPath(LinkOption... options) throws IOException { + return this.getTargetOrMetadataPath() + } + + Path toTargetPath() { + return getTargetOrMetadataPath() + } + + /** + * Get the path associated to a DataOutput metadata. + * + * @return Path associated to a DataOutput + * @throws FileNotFoundException if the metadata associated to the LinPath does not exist or its type is not a DataOutput. + */ + protected Path getTargetPath() { + return findTarget(fileSystem, filePath, false, parseChildrenFromFragment(fragment)) + } + + /** + * Get the path associated to any metadata object. + * + * @return Path associated to a DataOutput or LinMetadataFile with the metadata object for other types. + * @throws FileNotFoundException if the metadata associated to the LinPath does not exist + */ + protected Path getTargetOrMetadataPath(){ + return findTarget(fileSystem, filePath, true, parseChildrenFromFragment(fragment)) + } + + @Override + File toFile() throws IOException { + throw new UnsupportedOperationException("toFile not supported by LinPath") + } + + @Override + WatchKey register(WatchService watcher, WatchEvent.Kind[] events, WatchEvent.Modifier... modifiers) throws IOException { + throw new UnsupportedOperationException("Register not supported by LinPath") + } + + @Override + int compareTo(Path other) { + return toString().compareTo(other.toString()); + } + + @Override + boolean equals(Object other) { + if( LinPath.class != other.class ) { + return false + } + final that = (LinPath)other + return this.fileSystem == that.fileSystem && this.filePath.equals(that.filePath) + } + + /** + * @return The unique hash code for this path + */ + @Override + int hashCode() { + return Objects.hash(fileSystem,filePath) + } + + static URI asUri(String path) { + if (!path) + throw new IllegalArgumentException("Missing 'path' argument") + if (!path.startsWith(LID_PROT)) + throw new IllegalArgumentException("Invalid LID file system path URI - it must start with '${LID_PROT}' prefix - offendinf value: $path") + if (path.startsWith(LID_PROT + SEPARATOR) && path.length() > 7) + throw new IllegalArgumentException("Invalid LID file system path URI - make sure the schema prefix does not container more than two slash characters - offending value: $path") + if (path == LID_PROT) //Empty path case + return new URI("lid:///") + return new URI(path) + } + + @Override + String toString() { + return "$filePath${query ? '?' + query: ''}${fragment ? '#'+ fragment : ''}".toString() + } + +} + diff --git a/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPathFactory.groovy b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPathFactory.groovy new file mode 100644 index 0000000000..1c2c7350e1 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/fs/LinPathFactory.groovy @@ -0,0 +1,59 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import static LinPath.* + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import nextflow.lineage.config.LineageConfig +import nextflow.file.FileHelper +import nextflow.file.FileSystemPathFactory +/** + * Implements a {@link FileSystemPathFactory} for LID file system + * + * @author Jorge Ejarque + */ +@CompileStatic +class LinPathFactory extends FileSystemPathFactory { + + @Override + protected Path parseUri(String uri) { + return isLidUri(uri) ? create(uri) : null + } + + @Override + protected String toUriString(Path path) { + return path instanceof LinPath ? ((LinPath)path).toUriString() : null + } + + @Override + protected String getBashLib(Path target) { + return null + } + + @Override + protected String getUploadCmd(String source, Path target) { + return null + } + + static LinPath create(String path) { + final uri = LinPath.asUri(path) + return (LinPath) FileHelper.getOrCreateFileSystemFor(uri, LineageConfig.asMap()).provider().getPath(uri) + } +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/Annotation.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/Annotation.groovy new file mode 100644 index 0000000000..064bc4e64e --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/Annotation.groovy @@ -0,0 +1,33 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.lineage.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Models an Annotation. + * + * @author Jorge Ejarque + */ +@Canonical +@CompileStatic +class FileOutput implements LinSerializable { + /** + * Real path of the output data. + */ + String path + /** + * Checksum of the output data. + */ + Checksum checksum + /** + * Entity that generated the data. Possible entities are: + * - a DataOutput if the workflow published from a task data. + * - a TaskRun if the data is a task output. + * - a WorkflowRun if the data is generated by the workflow (e.g., an index file). + */ + String source + /** + * Reference to the WorkflowRun that generated the data. + */ + String workflowRun + /** + * Reference to the task that generated the data. + */ + String taskRun + /** + * Size of the data. + */ + long size + /** + * Data creation date. + */ + OffsetDateTime createdAt + /** + * Data last modified date. + */ + OffsetDateTime modifiedAt + /** + * Annotations attached to the data + */ + List annotations +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/Parameter.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/Parameter.groovy new file mode 100644 index 0000000000..3e3c00343f --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/Parameter.groovy @@ -0,0 +1,33 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic + +/** + * Model Workflow and Task Parameters. + * + * @author Jorge Ejarque + */ +@Canonical +@CompileStatic +class TaskOutput implements LinSerializable { + /** + * Reference to the task that generated the data. + */ + String taskRun + /** + * Reference to the WorkflowRun that generated the data. + */ + String workflowRun + /** + * Creation date of this task outputs description + */ + OffsetDateTime createdAt + /** + * Outputs of the task + */ + List output + /** + * Annotations attached to the task outputs + */ + List annotations +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy new file mode 100644 index 0000000000..ebcc7c4b36 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/TaskRun.groovy @@ -0,0 +1,83 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import nextflow.lineage.serde.LinSerializable + +/** + * Models a task execution. + * + * @author Paolo Di Tommaso + */ +@Canonical +@CompileStatic +class TaskRun implements LinSerializable { + /** + * Execution session identifier + */ + String sessionId + /** + * Task name + */ + String name + /** + * Checksum of the task source code + */ + Checksum codeChecksum + /** + * Checksum of the task script + */ + String script + /** + * Task run input + */ + List input + /** + * Container used for the task run + */ + String container + /** + * Conda environment used for the task run + */ + String conda + /** + * Spack environment used for the task run + */ + String spack + /** + * Architecture defined in the Spack environment used for the task run + */ + String architecture + /** + * Global variables defined in the task run + */ + Map globalVars + /** + * Binaries used in the task run + */ + List binEntries + /** + * Workflow run associated to the task run + */ + String workflowRun + /** + * Annotations attached to the task run + */ + List annotations +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/Workflow.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/Workflow.groovy new file mode 100644 index 0000000000..b47781e47a --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/Workflow.groovy @@ -0,0 +1,44 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import nextflow.lineage.serde.LinSerializable + + +/** + * Models a workflow definition. + * + * @author Jorge Ejarque scriptFiles + /** + * Workflow repository + */ + String repository + /** + * Workflow commit identifier + */ + String commitId +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutput.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutput.groovy new file mode 100644 index 0000000000..134e2cebed --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowOutput.groovy @@ -0,0 +1,49 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import nextflow.lineage.serde.LinSerializable + +import java.time.OffsetDateTime + +/** + * Models the results of a workflow execution. + * + * @author Jorge Ejarque output + /** + * Annotations attached to the workflow outputs + */ + List annotations +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy new file mode 100644 index 0000000000..2676432ed8 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/WorkflowRun.groovy @@ -0,0 +1,55 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.model + +import groovy.transform.Canonical +import groovy.transform.CompileStatic +import nextflow.lineage.serde.LinSerializable + +/** + * Models a Workflow Execution + * + * @author Jorge Ejarque params + /** + * Resolved Configuration + */ + Map config + /** + * Annotations attached to the workflow run + */ + List annotations +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy new file mode 100644 index 0000000000..334696e858 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder.groovy @@ -0,0 +1,53 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.serde + +import groovy.transform.CompileStatic +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.TaskOutput +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.WorkflowRun +import nextflow.serde.gson.GsonEncoder +import nextflow.serde.gson.RuntimeTypeAdapterFactory + +/** + * Implements a JSON encoder for lineage model objects + * + * @author Paolo Di Tommaso + */ +@CompileStatic +class LinEncoder extends GsonEncoder { + + LinEncoder() { + withTypeAdapterFactory(newLidTypeAdapterFactory()) + // enable rendering of null values + withSerializeNulls(true) + } + + static RuntimeTypeAdapterFactory newLidTypeAdapterFactory(){ + RuntimeTypeAdapterFactory.of(LinSerializable.class, "type") + .registerSubtype(WorkflowRun, WorkflowRun.simpleName) + .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) + .registerSubtype(Workflow, Workflow.simpleName) + .registerSubtype(TaskRun, TaskRun.simpleName) + .registerSubtype(TaskOutput, TaskOutput.simpleName) + .registerSubtype(FileOutput, FileOutput.simpleName) + } + +} diff --git a/modules/nf-lineage/src/main/nextflow/lineage/serde/LinSerializable.groovy b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinSerializable.groovy new file mode 100644 index 0000000000..fabb0bded9 --- /dev/null +++ b/modules/nf-lineage/src/main/nextflow/lineage/serde/LinSerializable.groovy @@ -0,0 +1,29 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.serde + +import groovy.transform.CompileStatic +import nextflow.serde.JsonSerializable +/** + * Marker interface for lineage serializable objects + * + * @author Paolo Di Tommaso + */ +@CompileStatic +interface LinSerializable extends JsonSerializable { + +} diff --git a/modules/nf-lineage/src/resources/META-INF/extensions.idx b/modules/nf-lineage/src/resources/META-INF/extensions.idx new file mode 100644 index 0000000000..53c350a1be --- /dev/null +++ b/modules/nf-lineage/src/resources/META-INF/extensions.idx @@ -0,0 +1,19 @@ +# +# Copyright 2013-2025, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +nextflow.lineage.DefaultLinStoreFactory +nextflow.lineage.LinObserverFactory +nextflow.lineage.cli.LinCommandImpl diff --git a/modules/nf-lineage/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/modules/nf-lineage/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider new file mode 100644 index 0000000000..4c5e0f213d --- /dev/null +++ b/modules/nf-lineage/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider @@ -0,0 +1,17 @@ +# +# Copyright 2013-2025, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +nextflow.lineage.fs.LinFileSystemProvider diff --git a/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinHistoryLogTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinHistoryLogTest.groovy new file mode 100644 index 0000000000..bde00b0595 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinHistoryLogTest.groovy @@ -0,0 +1,138 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.lineage + +import spock.lang.Specification + +import java.nio.file.Files +import java.nio.file.Path + +/** + * Lineage History file tests + * + * @author Jorge Ejarque + */ +class DefaultLinHistoryLogTest extends Specification { + + Path tempDir + Path historyFile + DefaultLinHistoryLog linHistoryLog + + def setup() { + tempDir = Files.createTempDirectory("wdir") + historyFile = tempDir.resolve("lin-history") + linHistoryLog = new DefaultLinHistoryLog(historyFile) + } + + def cleanup(){ + tempDir?.deleteDir() + } + + def "write should add a new file to the history folder"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "TestRun" + String runLid = "lid://123" + + when: + linHistoryLog.write(runName, sessionId, runLid) + + then: + def files = historyFile.listFiles() + files.size() == 1 + def parsedRecord = LinHistoryRecord.parse(files[0].text) + parsedRecord.sessionId == sessionId + parsedRecord.runName == runName + parsedRecord.runLid == runLid + } + + def "should return correct record for existing session"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "Run1" + String runLid = "lid://123" + + and: + linHistoryLog.write(runName, sessionId, runLid) + + when: + def record = linHistoryLog.getRecord(sessionId) + then: + record.sessionId == sessionId + record.runName == runName + record.runLid == runLid + } + + def "should return null and warn if session does not exist"() { + expect: + linHistoryLog.getRecord(UUID.randomUUID()) == null + } + + def "update should modify existing Lid for given session"() { + given: + UUID sessionId = UUID.randomUUID() + String runName = "Run1" + String runLidUpdated = "run-lid-updated" + + and: + linHistoryLog.write(runName, sessionId, 'run-lid-initial') + + when: + linHistoryLog.updateRunLid(sessionId, runLidUpdated) + + then: + def files = historyFile.listFiles() + files.size() == 1 + def parsedRecord = LinHistoryRecord.parse(files[0].text) + parsedRecord.runLid == runLidUpdated + } + + def "update should do nothing if session does not exist"() { + given: + UUID existingSessionId = UUID.randomUUID() + UUID nonExistingSessionId = UUID.randomUUID() + String runName = "Run1" + String runLid = "lid://123" + and: + linHistoryLog.write(runName, existingSessionId, runLid) + + when: + linHistoryLog.updateRunLid(nonExistingSessionId, "new-lid") + then: + def files = historyFile.listFiles() + files.size() == 1 + def parsedRecord = LinHistoryRecord.parse(files[0].text) + parsedRecord.runLid == runLid + } + + def 'should get records' () { + given: + UUID sessionId = UUID.randomUUID() + String runName = "Run1" + String runLid = "lid://123" + and: + linHistoryLog.write(runName, sessionId, runLid) + + when: + def records = linHistoryLog.getRecords() + then: + records.size() == 1 + records[0].sessionId == sessionId + records[0].runName == runName + records[0].runLid == runLid + } +} + diff --git a/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreFactoryTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreFactoryTest.groovy new file mode 100644 index 0000000000..53d031703a --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreFactoryTest.groovy @@ -0,0 +1,49 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import nextflow.lineage.config.LineageConfig +import spock.lang.Specification +import spock.lang.Unroll + +/** + * + * @author Paolo Di Tommaso + */ +class DefaultLinStoreFactoryTest extends Specification { + + @Unroll + def 'should validate can open' () { + given: + def factory = new DefaultLinStoreFactory() + def config = new LineageConfig(CONFIG) + + expect: + factory.canOpen(config) == EXPECTED + + where: + EXPECTED | CONFIG + true | [:] + true | [store:[location:'/some/path']] + true | [store:[location:'some/rel/path']] + true | [store:[location:'file:/this/that']] + true | [store:[location:'s3://some/path']] + false | [store:[location:'http://some/path']] + false | [store:[location:'jdbc:foo']] + } + +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy new file mode 100644 index 0000000000..db135923d4 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/DefaultLinStoreTest.groovy @@ -0,0 +1,139 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import nextflow.lineage.model.Annotation + +import java.nio.file.Files +import java.nio.file.Path +import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset + +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.config.LineageConfig +import spock.lang.Specification +import spock.lang.TempDir + +/** + * + * @author Jorge Ejarque + */ +class DefaultLinStoreTest extends Specification { + + @TempDir + Path tempDir + + Path storeLocation + Path metaLocation + LineageConfig config + + def setup() { + storeLocation = tempDir.resolve("store") + metaLocation = storeLocation.resolve(".meta") + def configMap = [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]] + config = new LineageConfig(configMap) + } + + def 'should open store'() { + given: + def store = new DefaultLinStore() + when: + store.open(config) + def historyLog = store.getHistoryLog() + then: + store.getMetadataPath() == metaLocation + historyLog != null + historyLog instanceof DefaultLinHistoryLog + } + + def "save should store value in the correct file location"() { + given: + def key = "testKey" + def value = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) + def lidStore = new DefaultLinStore() + lidStore.open(config) + + when: + lidStore.save(key, value) + + then: + def filePath = metaLocation.resolve("$key/.data.json") + Files.exists(filePath) + filePath.text == new LinEncoder().encode(value) + } + + def "load should retrieve stored value correctly"() { + given: + def key = "testKey" + def value = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), "lid://source", "lid://workflow", "lid://task", 1234) + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value) + + expect: + lidStore.load(key).toString() == value.toString() + } + + def "load should return null if key does not exist"() { + given: + def lidStore = new DefaultLinStore() + lidStore.open(config) + + expect: + lidStore.load("nonexistentKey") == null + } + + def 'should query' () { + given: + def uniqueId = UUID.randomUUID() + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(1234567), ZoneOffset.UTC) + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [ new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")] ) + def key2 = "testKey2" + def value2 = new FileOutput("/path/tp/file1", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key1","value1"), new Annotation("key2","value2")]) + def key3 = "testKey3" + def value3 = new FileOutput("/path/tp/file2", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key2","value2"), new Annotation("key3","value3")]) + def key4 = "testKey4" + def value4 = new FileOutput("/path/tp/file", new Checksum("78910", "nextflow", "standard"), "testkey", "testkey", null, 1234, time, time, [new Annotation("key4","value4"), new Annotation("key3","value3")]) + + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value1) + lidStore.save(key2, value2) + lidStore.save(key3, value3) + lidStore.save(key4, value4) + + when: + def results = lidStore.search("type=FileOutput&annotations.key=key2&annotations.value=value2") + then: + results.size() == 2 + results.keySet().containsAll([key2,key3]) + results[key2] == value2 + results[key3] == value3 + } + + +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinHistoryRecordTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinHistoryRecordTest.groovy new file mode 100644 index 0000000000..c874ed2b1f --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinHistoryRecordTest.groovy @@ -0,0 +1,61 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.lineage + +import spock.lang.Specification + +/** + * Lineage History Record tests + * + * @author Jorge Ejarque + */ +class LinHistoryRecordTest extends Specification { + def "LinHistoryRecord parse should throw for invalid record"() { + when: + LinHistoryRecord.parse("invalid-record") + + then: + thrown(IllegalArgumentException) + } + + def "LinHistoryRecord parse should handle 4-column record"() { + given: + def timestamp = new Date() + def formattedTimestamp = LinHistoryRecord.TIMESTAMP_FMT.format(timestamp) + def line = "${formattedTimestamp}\trun-1\t${UUID.randomUUID()}\tlid://123" + + when: + def record = LinHistoryRecord.parse(line) + + then: + record.timestamp != null + record.runName == "run-1" + record.runLid == "lid://123" + } + + def "LinHistoryRecord toString should produce tab-separated format"() { + given: + UUID sessionId = UUID.randomUUID() + def record = new LinHistoryRecord(new Date(), "TestRun", sessionId, "lid://123") + + when: + def line = record.toString() + + then: + line.contains("\t") + line.split("\t").size() == 4 + } +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy new file mode 100644 index 0000000000..349c36889a --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinObserverTest.groovy @@ -0,0 +1,561 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.lineage + +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskOutput +import nextflow.file.FileHolder +import nextflow.processor.TaskHandler +import nextflow.script.TokenVar +import nextflow.script.params.EnvOutParam +import nextflow.script.params.FileInParam +import nextflow.script.params.FileOutParam +import nextflow.script.params.InParam +import nextflow.script.params.OutParam +import nextflow.script.params.StdInParam +import nextflow.script.params.StdOutParam +import nextflow.script.params.ValueInParam +import nextflow.script.params.ValueOutParam +import spock.lang.Shared + +import static nextflow.lineage.fs.LinPath.* + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributes + +import com.google.common.hash.HashCode +import nextflow.Session +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.lineage.config.LineageConfig +import nextflow.processor.TaskConfig +import nextflow.processor.TaskId +import nextflow.processor.TaskProcessor +import nextflow.processor.TaskRun +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.WorkflowMetadata +import nextflow.util.CacheHelper +import nextflow.util.PathNormalizer +import spock.lang.Specification +import spock.lang.Unroll + +/** + * + * @author Paolo Di Tommaso + */ +class LinObserverTest extends Specification { + @Shared + Path lidFolder = Files.createTempDirectory("lid") + def cleanupSpec(){ + lidFolder.deleteDir() + } + + def 'should normalize paths' (){ + given: + def folder = Files.createTempDirectory('test') + def workDir = folder.resolve("workDir") + def projectDir = folder.resolve("projectDir") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getProjectDir() >> projectDir + getWorkDir() >> workDir + } + def params = [path: workDir.resolve("path/file.txt"), sequence: projectDir.resolve("file2.txt").toString(), value: 12] + when: + def results = LinObserver.getNormalizedParams(params, new PathNormalizer(metadata)) + then: + results.size() == 3 + results.get(0).name == "path" + results.get(0).type == Path.simpleName + results.get(0).value == "work/path/file.txt" + results.get(1).name == "sequence" + results.get(1).type == "String" + results.get(1).value == projectDir.resolve("file2.txt").toString() + results.get(2).name == "value" + results.get(2).type == "Integer" + results.get(2).value == 12 + + cleanup: + ScriptMeta.reset() + folder?.deleteDir() + } + def 'should collect script files' () { + given: + def folder = Files.createTempDirectory('test') + and: + def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def store = new DefaultLinStore(); + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def module1 = folder.resolve("script1.nf"); module1.text = 'hola' + def module2 = folder.resolve("script2.nf"); module2.text = 'world' + and: + + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> folder.resolve("workDir") + } + def session = Mock(Session) { + getConfig() >> config + getUniqueId() >> uniqueId + getRunName() >> "test_run" + getWorkflowMetadata() >> metadata + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(LineageConfig.create(session)) + def observer = Spy(new LinObserver(session, store)) + + when: + def files = observer.collectScriptDataPaths(new PathNormalizer(metadata)) + then: + observer.allScriptFiles() >> [ scriptFile, module1, module2 ] + and: + files.size() == 3 + and: + files[0].path == "file://${scriptFile.toString()}" + files[0].checksum == new Checksum("78910", "nextflow", "standard") + and: + files[1].path == "file://$module1" + files[1].checksum == Checksum.ofNextflow(module1.text) + and: + files[2].path == "file://$module2" + files[2].checksum == Checksum.ofNextflow(module2.text) + + cleanup: + ScriptMeta.reset() + folder?.deleteDir() + } + + def 'should save workflow' (){ + given: + def folder = Files.createTempDirectory('test') + def config = [lineage:[enabled: true, store:[location:folder.toString()]]] + def store = new DefaultLinStore(); + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> folder.resolve("workDir") + } + def session = Mock(Session) { + getConfig() >> config + getUniqueId() >> uniqueId + getRunName() >> "test_run" + getWorkflowMetadata() >> metadata + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) + def mainScript = new DataPath("file://${scriptFile.toString()}", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript],"https://nextflow.io/nf-test/", "123456" ) + def workflowRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [], config) + when: + observer.onFlowCreate(session) + observer.onFlowBegin() + then: + folder.resolve(".meta/${observer.executionHash}/.data.json").text == new LinEncoder().encode(workflowRun) + + cleanup: + folder?.deleteDir() + } + + def 'should get parameter type' () { + expect: + LinObserver.getParameterType(PARAM) == STRING + where: + PARAM | STRING + new FileInParam(null, []) | "path" + new ValueOutParam(null, []) | "val" + new EnvOutParam(null, []) | "env" + new StdInParam(null, []) | "stdin" + new StdOutParam(null, []) | "stdout" + Path.of("test") | "Path" + ["test"] | "Collection" + [key:"value"] | "Map" + } + + def 'should save task run' () { + given: + def folder = Files.createTempDirectory('test').toRealPath() + def config = [workflow:[lineage:[enabled: true, store:[location:folder.toString()]]]] + def uniqueId = UUID.randomUUID() + def workDir = folder.resolve("work") + def session = Mock(Session) { + getConfig()>>config + getUniqueId()>>uniqueId + getRunName()>>"test_run" + getWorkDir() >> workDir + } + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> workDir + } + and: + def store = new DefaultLinStore(); + store.open(LineageConfig.create(session)) + and: + def observer = new LinObserver(session, store) + def normalizer = new PathNormalizer(metadata) + observer.executionHash = "hash" + observer.normalizer = normalizer + and: + def hash = HashCode.fromString("1234567890") + def taskWd = workDir.resolve('12/34567890') + Files.createDirectories(taskWd) + and: + def processor = Mock(TaskProcessor){ + getTaskGlobalVars(_) >> [:] + getTaskBinEntries(_) >> [] + } + + and: 'Task Inputs' + def inputs = new LinkedHashMap() + // File from task + inputs.put(new FileInParam(null, []).bind("file1"), [new FileHolder(workDir.resolve('78/567890/file1.txt'))]) + // Normal file + def file = folder.resolve("file2.txt") + file.text = "this is a test file" + def fileHash = CacheHelper.hasher(file).hash().toString() + inputs.put(new FileInParam(null, []).bind("file2"), [new FileHolder(file)]) + //Value input + inputs.put(new ValueInParam(null, []).bind("id"), "value") + + and: 'Task Outputs' + def outputs = new LinkedHashMap() + // Single Path output + def outFile1 = taskWd.resolve('fileOut1.txt') + outFile1.text = 'some data' + def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() + def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) + outputs.put(new FileOutParam(null, []).bind(new TokenVar("file1")), outFile1) + // Collection Path output + def outFile2 = taskWd.resolve('fileOut2.txt') + outFile2.text = 'some other data' + def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() + def attrs2 = Files.readAttributes(outFile2, BasicFileAttributes) + outputs.put(new FileOutParam(null, []).bind(new TokenVar("file2")), [outFile2]) + outputs.put(new ValueOutParam(null, []).bind(new TokenVar("id")), "value") + + and: 'Task description' + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getProcessor() >> processor + getSource() >> 'echo task source' + getScript() >> 'this is the script' + getInputs() >> inputs + getOutputs() >> outputs + getWorkDir() >> taskWd + } + def handler = Mock(TaskHandler){ + getTask() >> task + } + + and: 'Expected LID objects' + def sourceHash = CacheHelper.hasher('echo task source').hash().toString() + def script = 'this is the script' + def taskDescription = new nextflow.lineage.model.TaskRun(uniqueId.toString(), "foo", + new Checksum(sourceHash, "nextflow", "standard"), + script, + [ + new Parameter("path", "file1", ['lid://78567890/file1.txt']), + new Parameter("path", "file2", [[path: normalizer.normalizePath(file), checksum: [value:fileHash, algorithm: "nextflow", mode: "standard"]]]), + new Parameter("val", "id", "value") + ], null, null, null, null, [:], [], "lid://hash", null) + def dataOutput1 = new FileOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), + "lid://1234567890", "lid://hash", "lid://1234567890", attrs1.size(), LinUtils.toDate(attrs1.creationTime()), LinUtils.toDate(attrs1.lastModifiedTime()) ) + def dataOutput2 = new FileOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), + "lid://1234567890", "lid://hash", "lid://1234567890", attrs2.size(), LinUtils.toDate(attrs2.creationTime()), LinUtils.toDate(attrs2.lastModifiedTime()) ) + + when: + observer.onProcessComplete(handler, null ) + def taskRunResult = store.load("${hash.toString()}") + def dataOutputResult1 = store.load("${hash}/fileOut1.txt") as FileOutput + def dataOutputResult2 = store.load("${hash}/fileOut2.txt") as FileOutput + def taskOutputsResult = store.load("${hash}#output") as TaskOutput + then: + taskRunResult == taskDescription + dataOutputResult1 == dataOutput1 + dataOutputResult2 == dataOutput2 + taskOutputsResult.taskRun == "lid://1234567890" + taskOutputsResult.workflowRun == "lid://hash" + taskOutputsResult.output.size() == 3 + taskOutputsResult.output.get(0).type == "path" + taskOutputsResult.output.get(0).name == "file1" + taskOutputsResult.output.get(0).value == "lid://1234567890/fileOut1.txt" + taskOutputsResult.output.get(1).type == "path" + taskOutputsResult.output.get(1).name == "file2" + taskOutputsResult.output.get(1).value == ["lid://1234567890/fileOut2.txt"] + taskOutputsResult.output.get(2).type == "val" + taskOutputsResult.output.get(2).name == "id" + taskOutputsResult.output.get(2).value == "value" + + cleanup: + folder?.deleteDir() + } + + def 'should save task data output' () { + given: + def folder = Files.createTempDirectory('test') + def config = [lineage:[enabled: true, store:[location:folder.toString()]]] + def store = new DefaultLinStore(); + def session = Mock(Session) { + getConfig()>>config + } + store.open(LineageConfig.create(session)) + def observer = Spy(new LinObserver(session, store)) + observer.executionHash = "hash" + and: + def workDir = folder.resolve('12/34567890') + Files.createDirectories(workDir) + and: + def outFile = workDir.resolve('foo/bar/file.bam') + Files.createDirectories(outFile.parent) + outFile.text = 'some data' + def fileHash = CacheHelper.hasher(outFile).hash().toString() + and: + def hash = HashCode.fromInt(123456789) + and: + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> workDir + } + and: + def attrs = Files.readAttributes(outFile, BasicFileAttributes) + def output = new FileOutput(outFile.toString(), new Checksum(fileHash, "nextflow", "standard"), + "lid://15cd5b07", "lid://hash", "lid://15cd5b07", attrs.size(), LinUtils.toDate(attrs.creationTime()), LinUtils.toDate(attrs.lastModifiedTime()) ) + and: + observer.readAttributes(outFile) >> attrs + + when: + observer.storeTaskOutput(task, outFile) + then: + folder.resolve(".meta/${hash}/foo/bar/file.bam/.data.json").text == new LinEncoder().encode(output) + + cleanup: + folder?.deleteDir() + } + + def 'should relativise task output dirs' (){ + when: + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); + def session = Mock(Session) { + getConfig()>>config + } + def hash = HashCode.fromInt(123456789) + def taskConfig = Mock(TaskConfig){ + getStoreDir() >> STORE_DIR + } + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> WORK_DIR + getConfig() >> taskConfig + } + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) + then: + observer.getTaskRelative(task, PATH) == EXPECTED + where: + WORK_DIR | STORE_DIR | PATH | EXPECTED + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/path/to/work/12/3456789/relative') | "relative" + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/path/to/storeDir/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('work/12/3456789/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('storeDir/relative') | "relative" + Path.of('work/12/3456789') | Path.of('storeDir') | Path.of('results/relative') | "results/relative" + Path.of('/path/to/work/12/3456789') | Path.of('storeDir') | Path.of('./relative') | "relative" + } + + @Unroll + def 'should return exception when relativize task output dirs'() { + when: + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); + def session = Mock(Session) { + getConfig()>>config + } + def hash = HashCode.fromInt(123456789) + def taskConfig = Mock(TaskConfig){ + getStoreDir() >> STORE_DIR + } + def task = Mock(TaskRun) { + getId() >> TaskId.of(100) + getName() >> 'foo' + getHash() >> hash + getWorkDir() >> WORK_DIR + getConfig() >> taskConfig + } + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) + observer.getTaskRelative(task, PATH) + then: + def e = thrown(IllegalArgumentException) + e.message == "Cannot access the relative path for output '$PATH' and task '${task.name}'".toString() + + where: + WORK_DIR | STORE_DIR | PATH + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('/another/path/relative') + Path.of('/path/to/work/12/3456789') | Path.of('/path/to/storeDir') | Path.of('../path/to/storeDir/relative') + } + + def 'should relativize workflow output dirs' (){ + when: + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); + def session = Mock(Session) { + getOutputDir()>>OUTPUT_DIR + getConfig()>>config + } + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) + then: + observer.getWorkflowRelative(PATH) == EXPECTED + where: + OUTPUT_DIR | PATH | EXPECTED + Path.of('/path/to/outDir') | Path.of('/path/to/outDir/relative') | "relative" + Path.of('outDir') | Path.of('outDir/relative') | "relative" + Path.of('/path/to/outDir') | Path.of('results/relative') | "results/relative" + Path.of('/path/to/outDir') | Path.of('./relative') | "relative" + } + + @Unroll + def 'should return exception when relativize workflow output dirs' (){ + when: + def config = [workflow:[lineage:[enabled: true, store:[location:lidFolder.toString()]]]] + def store = new DefaultLinStore(); + def session = Mock(Session) { + getOutputDir()>>OUTPUT_DIR + getConfig()>>config + } + def observer = new LinObserver(session, store) + observer.getWorkflowRelative(PATH) + then: + def e = thrown(IllegalArgumentException) + e.message == "Cannot access relative path for workflow output '$PATH'" + where: + OUTPUT_DIR | PATH | EXPECTED + Path.of('/path/to/outDir') | Path.of('/another/path/') | "relative" + Path.of('/path/to/outDir') | Path.of('../relative') | "relative" + } + + def 'should save workflow output'() { + given: + def folder = Files.createTempDirectory('test') + def config = [lineage:[enabled: true, store:[location:folder.toString()]]] + def store = new DefaultLinStore(); + def outputDir = folder.resolve('results') + def uniqueId = UUID.randomUUID() + def scriptFile = folder.resolve("main.nf") + def workDir= folder.resolve("work") + def metadata = Mock(WorkflowMetadata){ + getRepository() >> "https://nextflow.io/nf-test/" + getCommitId() >> "123456" + getScriptId() >> "78910" + getScriptFile() >> scriptFile + getProjectDir() >> folder.resolve("projectDir") + getWorkDir() >> workDir + } + def session = Mock(Session) { + getConfig()>>config + getOutputDir()>>outputDir + getWorkDir() >> workDir + getWorkflowMetadata()>>metadata + getUniqueId()>>uniqueId + getRunName()>>"test_run" + getParams() >> new ScriptBinding.ParamsMap() + } + store.open(LineageConfig.create(session)) + def observer = new LinObserver(session, store) + def encoder = new LinEncoder() + + when: 'Starting workflow' + observer.onFlowCreate(session) + observer.onFlowBegin() + then: 'History file should contain execution hash' + def lid = store.getHistoryLog().getRecord(uniqueId).runLid.substring(LID_PROT.size()) + lid == observer.executionHash + + when: ' publish output with source file' + def outFile1 = outputDir.resolve('foo/file.bam') + Files.createDirectories(outFile1.parent) + outFile1.text = 'some data1' + def sourceFile1 = workDir.resolve('12/3987/file.bam') + Files.createDirectories(sourceFile1.parent) + sourceFile1.text = 'some data1' + observer.onFilePublish(outFile1, sourceFile1) + observer.onWorkflowPublish("a", outFile1) + + then: 'check file 1 output metadata in lid store' + def attrs1 = Files.readAttributes(outFile1, BasicFileAttributes) + def fileHash1 = CacheHelper.hasher(outFile1).hash().toString() + def output1 = new FileOutput(outFile1.toString(), new Checksum(fileHash1, "nextflow", "standard"), + "lid://123987/file.bam", "$LID_PROT${observer.executionHash}", null, + attrs1.size(), LinUtils.toDate(attrs1.creationTime()), LinUtils.toDate(attrs1.lastModifiedTime()) ) + folder.resolve(".meta/${observer.executionHash}/foo/file.bam/.data.json").text == encoder.encode(output1) + + when: 'publish without source path' + def outFile2 = outputDir.resolve('foo/file2.bam') + Files.createDirectories(outFile2.parent) + outFile2.text = 'some data2' + def attrs2 = Files.readAttributes(outFile2, BasicFileAttributes) + def fileHash2 = CacheHelper.hasher(outFile2).hash().toString() + observer.onFilePublish(outFile2) + observer.onWorkflowPublish("b", outFile2) + then: 'Check outFile2 metadata in lid store' + def output2 = new FileOutput(outFile2.toString(), new Checksum(fileHash2, "nextflow", "standard"), + "lid://${observer.executionHash}" , "lid://${observer.executionHash}", null, + attrs2.size(), LinUtils.toDate(attrs2.creationTime()), LinUtils.toDate(attrs2.lastModifiedTime()) ) + folder.resolve(".meta/${observer.executionHash}/foo/file2.bam/.data.json").text == encoder.encode(output2) + + when: 'Workflow complete' + observer.onFlowComplete() + then: 'Check history file is updated and Workflow Result is written in the lid store' + def finalLid = store.getHistoryLog().getRecord(uniqueId).runLid.substring(LID_PROT.size()) + def resultsRetrieved = store.load("${finalLid}#output") as WorkflowOutput + resultsRetrieved.output == [new Parameter(Path.simpleName, "a", "lid://${observer.executionHash}/foo/file.bam"), new Parameter(Path.simpleName, "b", "lid://${observer.executionHash}/foo/file2.bam")] + + cleanup: + folder?.deleteDir() + } + +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy new file mode 100644 index 0000000000..70987aabcb --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinPropertyValidationTest.groovy @@ -0,0 +1,39 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.lineage + +import spock.lang.Specification + +/** + * @author Jorge Ejarque + */ +class LinPropertyValidationTest extends Specification{ + + def 'should throw exception when property does not exist'(){ + when: + new LinPropertyValidator().validate(['value', 'not_existing']) + then: + def e = thrown(IllegalArgumentException) + e.message.startsWith( "Property 'not_existing' doesn't exist in the lineage model") + } + + def 'should not throw exception when property exist'(){ + when: + new LinPropertyValidator().validate(['value', 'output']) + then: + noExceptionThrown() + } +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy new file mode 100644 index 0000000000..47d1247c4a --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/LinUtilsTest.groovy @@ -0,0 +1,241 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage + +import java.time.ZoneId + +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.config.LineageConfig +import spock.lang.Specification +import spock.lang.TempDir + +import java.nio.file.Path +import java.nio.file.attribute.FileTime +import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset + +class LinUtilsTest extends Specification{ + + @TempDir + Path tempDir + + Path storeLocation + LineageConfig config + + def setup() { + storeLocation = tempDir.resolve("store") + def configMap = [enabled: true, store: [location: storeLocation.toString()]] + config = new LineageConfig(configMap) + } + + def 'should convert to Date'(){ + expect: + LinUtils.toDate(FILE_TIME) == DATE + where: + FILE_TIME | DATE + null | null + FileTime.fromMillis(1234) | Instant.ofEpochMilli(1234).atZone(ZoneId.systemDefault())?.toOffsetDateTime() + } + + def 'should convert to FileTime'(){ + expect: + LinUtils.toFileTime(DATE) == FILE_TIME + where: + FILE_TIME | DATE + null | null + FileTime.fromMillis(1234) | OffsetDateTime.ofInstant(Instant.ofEpochMilli(1234), ZoneOffset.UTC) + } + + + def 'should query'() { + given: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) + def outputs1 = new WorkflowOutput(OffsetDateTime.now(), "lid://testKey", [new Parameter( "String", "output", "name")] ) + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value1) + lidStore.save("$key#output", outputs1) + + when: + List params = LinUtils.query(lidStore, new URI('lid://testKey#params')) + then: + params.size() == 1 + params[0] instanceof List + (params[0] as List).size() == 2 + + when: + List outputs = LinUtils.query(lidStore, new URI('lid://testKey#output')) + then: + outputs.size() == 1 + outputs[0] instanceof List + def param = (outputs[0] as List)[0] as Parameter + param.name == "output" + + when: + LinUtils.query(lidStore, new URI('lid://testKey#no-exist')) + then: + thrown(IllegalArgumentException) + + when: + LinUtils.query(lidStore, new URI('lid://testKey#outputs.no-exist')) + then: + thrown(IllegalArgumentException) + + when: + LinUtils.query(lidStore, new URI('lid://no-exist#something')) + then: + thrown(IllegalArgumentException) + } + + def "should parse children elements form Fragment string"() { + expect: + LinUtils.parseChildrenFromFragment(FRAGMENT) == EXPECTED as String[] + + where: + FRAGMENT | EXPECTED + "workflow" | ["workflow"] + "workflow.repository" | ["workflow", "repository"] + null | [] + "" | [] + } + + def "should parse a query string as Map"() { + expect: + LinUtils.parseQuery(QUERY_STRING) == EXPECTED + + where: + QUERY_STRING | EXPECTED + "type=value1&taskRun=value2" | ["type": "value1", "taskRun": "value2"] + "type=val with space" | ["type": "val with space"] + "" | [:] + null | [:] + } + + def "should check params in an object"() { + given: + def obj = [ "type": "value", "workflow": ["repository": "subvalue"], "output" : [ ["path":"/to/file"],["path":"file2"] ] ] + + expect: + LinUtils.checkParams(obj, PARAMS) == EXPECTED + + where: + PARAMS | EXPECTED + ["type": "value"] | true + ["type": "wrong"] | false + ["workflow.repository": "subvalue"] | true + ["workflow.repository": "wrong"] | false + ["output.path": "wrong"] | false + ["output.path": "/to/file"] | true + ["output.path": "file2"] | true + + } + + def 'should parse query' (){ + expect: + LinUtils.parseQuery(PARAMS) == EXPECTED + where: + PARAMS | EXPECTED + "type=value" | ["type": "value"] + "workflow.repository=subvalue" | ["workflow.repository": "subvalue"] + "" | [:] + null | [:] + } + + def "should navigate in object params"() { + given: + def obj = [ + "key1": "value1", + "nested": [ + "subkey": "subvalue" + ] + ] + + expect: + LinUtils.navigate(obj, PATH) == EXPECTED + + where: + PATH | EXPECTED + "key1" | "value1" + "nested.subkey" | "subvalue" + "wrongKey" | null + } + + def "should add objects matching parameters"() { + given: + def results = [] + + when: + LinUtils.treatObject(OBJECT, PARAMS, results) + + then: + results == EXPECTED + + where: + OBJECT | PARAMS | EXPECTED + ["field": "value"] | ["field": "value"] | [["field": "value"]] + ["field": "wrong"] | ["field": "value"] | [] + [["field": "value"], ["field": "x"]] | ["field": "value"] | [["field": "value"]] + "string" | [:] | ["string"] + ["nested": ["subfield": "match"]] | ["nested.subfield": "match"] | [["nested": ["subfield": "match"]]] + ["nested": ["subfield": "nomatch"]] | ["nested.subfield": "match"] | [] + [["nested": ["subfield": "match"]], ["nested": ["subfield": "other"]]] | ["nested.subfield": "match"] | [["nested": ["subfield": "match"]]] + } + + def "Should search path"() { + given: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") + def key = "testKey" + def value1 = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) + def lidStore = new DefaultLinStore() + lidStore.open(config) + lidStore.save(key, value1) + when: + def result = LinUtils.searchPath(lidStore, key, ["name":"param1"], ["params"] as String[]) + + then: + result == [new Parameter("String", "param1", "value1")] + } + + def 'should navigate' (){ + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") + def wfRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", [key: "value1"]), new Parameter("String", "param2", "value2")]) + + expect: + LinUtils.navigate(wfRun, "workflow.commitId") == "123456" + LinUtils.navigate(wfRun, "params.name") == ["param1", "param2"] + LinUtils.navigate(wfRun, "params.value.key") == "value1" + LinUtils.navigate(wfRun, "params.value.no-exist") == null + LinUtils.navigate(wfRun, "params.no-exist") == null + LinUtils.navigate(wfRun, "no-exist") == null + LinUtils.navigate(null, "something") == null + } + +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy new file mode 100644 index 0000000000..ce17b89177 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy @@ -0,0 +1,467 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.cli + +import nextflow.SysEnv +import nextflow.config.ConfigMap +import nextflow.dag.MermaidHtmlRenderer +import nextflow.lineage.LinHistoryRecord +import nextflow.lineage.LinStoreFactory +import nextflow.lineage.DefaultLinHistoryLog +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.plugin.Plugins +import org.junit.Rule +import spock.lang.Specification +import spock.lang.TempDir +import test.OutputCapture +import java.nio.file.Files +import java.nio.file.Path +import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset + +class LinCommandImplTest extends Specification{ + + @TempDir + Path tmpDir + + Path storeLocation + ConfigMap configMap + + def setup() { + // clear the environment to avoid the local env pollute the test env + SysEnv.push([:]) + storeLocation = tmpDir.resolve("store") + configMap = new ConfigMap([lineage: [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]]]) + } + + def cleanup() { + Plugins.stop() + LinStoreFactory.reset() + SysEnv.pop() + } + + def setupSpec() { + LinStoreFactory.reset() + } + /* + * Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html + */ + @Rule + OutputCapture capture = new OutputCapture() + + def 'should print executions lids' (){ + given: + def historyFile = storeLocation.resolve(".meta/.history") + def lidLog = new DefaultLinHistoryLog(historyFile) + def uniqueId = UUID.randomUUID() + def date = new Date(); + def recordEntry = "${LinHistoryRecord.TIMESTAMP_FMT.format(date)}\trun_name\t${uniqueId}\tlid://123456".toString() + lidLog.write("run_name", uniqueId, "lid://123456", date) + when: + new LinCommandImpl().log(configMap) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 2 + stdout[1] == recordEntry + } + + def 'should print no history' (){ + given: + def historyFile = storeLocation.resolve(".meta/.history") + Files.createDirectories(historyFile.parent) + + when: + new LinCommandImpl().log(configMap) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('WARN') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "No workflow runs found in lineage history log" + } + + def 'should show lid content' (){ + given: + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def encoder = new LinEncoder().withPrettyPrint(true) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam","lid://123987/", null, 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + lidFile.text = jsonSer + when: + new LinCommandImpl().describe(configMap, ["lid://12345"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + } + + def 'should warn if no lid content' (){ + given: + + when: + new LinCommandImpl().describe(configMap, ["lid://12345"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Error loading lid://12345 - Lineage object 12345 not found" + } + + def 'should get lineage lid content' (){ + given: + + def outputHtml = tmpDir.resolve('lineage.html') + + def lidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") + def lidFile2 = storeLocation.resolve(".meta/123987/file.bam/.data.json") + def lidFile3 = storeLocation.resolve(".meta/123987/.data.json") + def lidFile4 = storeLocation.resolve(".meta/45678/output.txt/.data.json") + def lidFile5 = storeLocation.resolve(".meta/45678/.data.json") + Files.createDirectories(lidFile.parent) + Files.createDirectories(lidFile2.parent) + Files.createDirectories(lidFile3.parent) + Files.createDirectories(lidFile4.parent) + Files.createDirectories(lidFile5.parent) + def encoder = new LinEncoder() + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://45678", null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987", "lid://45678", "lid://123987", 1234, time, time, null) + lidFile2.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "foo", + new Checksum("abcde2345","nextflow","standard"), + 'this is a script', + [new Parameter( "val", "sample_id","ggal_gut"), + new Parameter("path","reads", ["lid://45678/output.txt"] ), + new Parameter("path","input", [new DataPath("path/to/file",new Checksum("45372qe","nextflow","standard"))]) + ], + null, null, null, null, [:],[], null) + lidFile3.text = encoder.encode(entry) + entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://45678", "lid://45678", null, 1234, time, time, null) + lidFile4.text = encoder.encode(entry) + entry = new TaskRun("u345-2346-1stw2", "bar", + new Checksum("abfs2556","nextflow","standard"), + 'this is a script', + null,null, null, null, null, [:],[], null) + lidFile5.text = encoder.encode(entry) + final network = """flowchart BT + lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} + lid://123987/file.bam@{shape: document, label: "lid://123987/file.bam"} + lid://123987@{shape: process, label: "foo [lid://123987]"} + ggal_gut@{shape: document, label: "ggal_gut"} + path/to/file@{shape: document, label: "path/to/file"} + lid://45678/output.txt@{shape: document, label: "lid://45678/output.txt"} + lid://45678@{shape: process, label: "bar [lid://45678]"} + + lid://123987/file.bam -->lid://12345/file.bam + lid://123987 -->lid://123987/file.bam + ggal_gut -->lid://123987 + lid://45678/output.txt -->lid://123987 + path/to/file -->lid://123987 + lid://45678 -->lid://45678/output.txt +""" + final template = MermaidHtmlRenderer.readTemplate() + def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) + + when: + new LinCommandImpl().render(configMap, ["lid://12345/file.bam", outputHtml.toString()]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Linage graph for lid://12345/file.bam rendered in ${outputHtml}" + outputHtml.exists() + outputHtml.text == expectedOutput + } + + def 'should get lineage from workflow lid content' (){ + given: + + def outputHtml = tmpDir.resolve('lineage.html') + + def lidFile = storeLocation.resolve(".meta/12345/file.bam/.data.json") + def lidFile3 = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + Files.createDirectories(lidFile3.parent) + def encoder = new LinEncoder() + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://12345", "lid://12345", null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) + def wf = new Workflow([new DataPath("/path/to/main.nf)")], "hello-nf", "aasdklk") + entry = new WorkflowRun(wf,"sessionId","run_name", + [new Parameter( "String", "sample_id","ggal_gut"), + new Parameter("Integer","reads",2)]) + lidFile3.text = encoder.encode(entry) + final network = """flowchart BT + lid://12345/file.bam@{shape: document, label: "lid://12345/file.bam"} + lid://12345@{shape: processes, label: "run_name [lid://12345]"} + ggal_gut@{shape: document, label: "ggal_gut"} + 2.0@{shape: document, label: "2.0"} + + lid://12345 -->lid://12345/file.bam + ggal_gut -->lid://12345 + 2.0 -->lid://12345 +""" + final template = MermaidHtmlRenderer.readTemplate() + def expectedOutput = template.replace('REPLACE_WITH_NETWORK_DATA', network) + + when: + new LinCommandImpl().render(configMap, ["lid://12345/file.bam", outputHtml.toString()]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0] == "Linage graph for lid://12345/file.bam rendered in ${outputHtml}" + outputHtml.exists() + outputHtml.text == expectedOutput + } + + def 'should show query results'(){ + given: + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def encoder = new LinEncoder().withPrettyPrint(true) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) + def jsonSer = encoder.encode(entry) + def expectedOutput = jsonSer + lidFile.text = jsonSer + when: + new LinCommandImpl().describe(configMap, ["lid:///?type=FileOutput"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == expectedOutput.readLines().size() + stdout.join('\n') == expectedOutput + } + + def 'should show query with fragment'(){ + given: + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def lidFile2 = storeLocation.resolve(".meta/67890/.data.json") + Files.createDirectories(lidFile2.parent) + def encoder = new LinEncoder().withPrettyPrint(true) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) + def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) + def expectedOutput1 = '[\n "path/to/file",\n "path/to/file2"\n]' + def expectedOutput2 = '[\n "path/to/file2",\n "path/to/file"\n]' + lidFile.text = encoder.encode(entry) + lidFile2.text = encoder.encode(entry2) + when: + new LinCommandImpl().describe(configMap, ["lid:///?type=FileOutput#path"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.join('\n') == expectedOutput1 || stdout.join('\n') == expectedOutput2 + } + + def 'should diff'(){ + given: + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def lidFile2 = storeLocation.resolve(".meta/67890/.data.json") + Files.createDirectories(lidFile2.parent) + def encoder = new LinEncoder().withPrettyPrint(true) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) + def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) + lidFile.text = encoder.encode(entry) + lidFile2.text = encoder.encode(entry2) + def expectedOutput = '''diff --git 12345 67890 +--- 12345 ++++ 67890 +@@ -1,15 +1,15 @@ + { + "type": "FileOutput", +- "path": "path/to/file", ++ "path": "path/to/file2", + "checksum": { +- "value": "45372qe", ++ "value": "42472qet", + "algorithm": "nextflow", + "mode": "standard" + }, +- "source": "lid://123987/file.bam", ++ "source": "lid://123987/file2.bam", + "workflowRun": "lid://123987/", + "taskRun": null, +- "size": 1234, ++ "size": 1235, + "createdAt": "1970-01-02T10:17:36.789Z", + "modifiedAt": "1970-01-02T10:17:36.789Z", + "annotations": null +''' + + when: + new LinCommandImpl().diff(configMap, ["lid://12345", "lid://67890"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.join('\n') == expectedOutput + } + + def 'should print error if no entry found diff'(){ + given: + def lidFile = storeLocation.resolve(".meta/12345/.data.json") + Files.createDirectories(lidFile.parent) + def encoder = new LinEncoder().withPrettyPrint(true) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) + lidFile.text = encoder.encode(entry) + + when: + new LinCommandImpl().diff(configMap, ["lid://89012", "lid://12345"]) + new LinCommandImpl().diff(configMap, ["lid://12345", "lid://67890"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 2 + stdout[0] == "No entry found for lid://89012." + stdout[1] == "No entry found for lid://67890." + } + + def 'should print error store is not found in diff'(){ + when: + def config = new ConfigMap() + new LinCommandImpl().log(config) + new LinCommandImpl().describe(config, ["lid:///?type=FileOutput"]) + new LinCommandImpl().render(config, ["lid://12345", "output.html"]) + new LinCommandImpl().diff(config, ["lid://89012", "lid://12345"]) + + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + def expectedOutput = "Error lineage store not loaded - Check Nextflow configuration" + then: + stdout.size() == 4 + stdout[0] == expectedOutput + stdout[1] == expectedOutput + stdout[2] == expectedOutput + stdout[3] == expectedOutput + } + + def 'should find metadata descriptions'(){ + given: + def lidFile = storeLocation.resolve(".meta/123987/file.bam/.data.json") + Files.createDirectories(lidFile.parent) + def lidFile2 = storeLocation.resolve(".meta/123987/file2.bam/.data.json") + Files.createDirectories(lidFile2.parent) + def encoder = new LinEncoder().withPrettyPrint(true) + def time = OffsetDateTime.ofInstant(Instant.ofEpochMilli(123456789), ZoneOffset.UTC) + def entry = new FileOutput("path/to/file",new Checksum("45372qe","nextflow","standard"), + "lid://123987/file.bam", "lid://123987/", null, 1234, time, time, null) + def entry2 = new FileOutput("path/to/file2",new Checksum("42472qet","nextflow","standard"), + "lid://123987/file2.bam", "lid://123987/", null, 1235, time, time, null) + def expectedOutput1 = '[\n "lid://123987/file.bam",\n "lid://123987/file2.bam"\n]' + def expectedOutput2 = '[\n "lid://123987/file2.bam",\n "lid://123987/file.bam"\n]' + lidFile.text = encoder.encode(entry) + lidFile2.text = encoder.encode(entry2) + when: + new LinCommandImpl().find(configMap, ["type=FileOutput"]) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.join('\n') == expectedOutput1 || stdout.join('\n') == expectedOutput2 + } + + + +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/config/LineageConfigTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/config/LineageConfigTest.groovy new file mode 100644 index 0000000000..648b5d20a2 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/config/LineageConfigTest.groovy @@ -0,0 +1,50 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.config + + +import spock.lang.Specification +/** + * + * @author Paolo Di Tommaso + */ +class LineageConfigTest extends Specification { + + def 'should create default config' () { + when: + def config = new LineageConfig(Map.of()) + then: + !config.enabled + !config.store.location + } + + def 'should create default with enable' () { + when: + def config = new LineageConfig([enabled: true]) + then: + config.enabled + !config.store.location + } + + def 'should create data config with location' () { + when: + def config = new LineageConfig(enabled: true, store: [location: "/some/data/store"]) + then: + config.enabled + config.store.location == '/some/data/store' + } +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy new file mode 100644 index 0000000000..0a47291897 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinFileSystemProviderTest.groovy @@ -0,0 +1,492 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import nextflow.lineage.DefaultLinStore +import spock.lang.Shared + +import java.nio.ByteBuffer +import java.nio.channels.NonWritableChannelException +import java.nio.file.AccessDeniedException +import java.nio.file.AccessMode +import java.nio.file.FileSystemNotFoundException +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.ProviderMismatchException +import java.nio.file.StandardOpenOption +import java.nio.file.attribute.BasicFileAttributes + +import nextflow.Global +import nextflow.Session +import spock.lang.Specification + +/** + * LID File system provider tests + * @author Jorge Ejarque + */ +class LinFileSystemProviderTest extends Specification { + + @Shared def wdir = Files.createTempDirectory('wdir') + @Shared def meta = wdir.resolve('.meta') + @Shared def data = wdir.resolve('work') + + def setupSpec(){ + meta.mkdirs() + data.mkdirs() + } + + def cleanupSpec(){ + wdir.deleteDir() + } + + def 'should return lid scheme' () { + given: + def provider = new LinFileSystemProvider() + expect: + provider.getScheme() == 'lid' + } + + def 'should get lid path' () { + given: + def lid = Mock(LinPath) + and: + def provider = new LinFileSystemProvider() + expect: + provider.toLinPath(lid) == lid + + when: + provider.toLinPath(Path.of('foo')) + then: + thrown(ProviderMismatchException) + } + + def 'should create new file system' () { + given: + def provider = new LinFileSystemProvider() + def config = [store:[location:data.toString()]] + def lid = LinPath.asUri('lid://12345') + when: + def fs = provider.newFileSystem(lid, config) as LinFileSystem + then: + (fs.store as DefaultLinStore).location == data + } + + def 'should get a file system' () { + given: + def provider = new LinFileSystemProvider() + def config = [store:[location: data.toString()]] + def uri = LinPath.asUri('lid://12345') + when: + provider.getFileSystem(uri) + then: + thrown(FileSystemNotFoundException) + + when: + provider.newFileSystem(uri, config) as LinFileSystem + and: + def fs = provider.getFileSystem(uri) as LinFileSystem + then: + (fs.store as DefaultLinStore).location == data + } + + def 'should get or create a file system' () { + given: + def config = [lineage:[store:[location: data.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def uri = LinPath.asUri('lid://12345') + def provider = new LinFileSystemProvider() + + when: + def fs = provider.getFileSystemOrCreate(uri) as LinFileSystem + then: + (fs.store as DefaultLinStore).location == data + + when: + def fs2 = provider.getFileSystemOrCreate(uri) as LinFileSystem + then: + fs2.is(fs) + } + + def 'should create new byte channel' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + def outputMeta = meta.resolve("12345/output.txt") + def output = data.resolve("output.txt") + output.text = "Hello, World!" + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"FileOutput","path":"'+output.toString()+'"}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345/output.txt')) + def opts = Set.of(StandardOpenOption.READ) + when: + def channel = provider.newByteChannel(lid, opts) + then: + channel.isOpen() + channel.position() == 0 + channel.size() == "Hello, World!".getBytes().size() + when: + channel.truncate(25) + then: + thrown(NonWritableChannelException) + + when: + def buffer = ByteBuffer.allocate(1000); + def read = channel.read(buffer) + def bytes = new byte[read] + buffer.get(0,bytes) + then: + bytes == "Hello, World!".getBytes() + when: + channel.position(2) + then: + channel.position() == 2 + + when: + channel.write(buffer) + then: + thrown(NonWritableChannelException) + + when: + provider.newByteChannel(lid, Set.of(StandardOpenOption.WRITE)) + then: + thrown(UnsupportedOperationException) + + when: + provider.newByteChannel(lid, Set.of(StandardOpenOption.APPEND)) + then: + thrown(UnsupportedOperationException) + + cleanup: + channel.close() + outputMeta.deleteDir() + output.delete() + } + + def 'should create new byte channel for LinMetadata' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + def outputMeta = meta.resolve("12345") + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"WorkflowRun","sessionId":"session","name":"run_name","params":[{"type":"String","name":"param1","value":"value1"}]}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345#name')) + + when: + def channel = provider.newByteChannel(lid, Set.of(StandardOpenOption.READ)) + then: + channel.isOpen() + channel.position() == 0 + channel.size() == '"run_name"'.getBytes().size() + + when: + channel.truncate(25) + then: + thrown(NonWritableChannelException) + + when: + def buffer = ByteBuffer.allocate(1000); + def read = channel.read(buffer) + def bytes = new byte[read] + buffer.get(0,bytes) + then: + bytes =='"run_name"'.getBytes() + + when: + channel.position(2) + then: + channel.position() == 2 + + when: + channel.write(buffer) + then: + thrown(NonWritableChannelException) + + when: + provider.newByteChannel(lid, Set.of(StandardOpenOption.WRITE)) + then: + thrown(UnsupportedOperationException) + + when: + provider.newByteChannel(lid, Set.of(StandardOpenOption.APPEND)) + then: + thrown(UnsupportedOperationException) + + cleanup: + channel.close() + outputMeta.deleteDir() + } + + def 'should read lid' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + def outputMeta = meta.resolve("12345/output.txt") + def output = data.resolve("output.txt") + output.text = "Hello, World!" + outputMeta.mkdirs() + outputMeta.resolve(".data.json").text = '{"type":"FileOutput","path":"'+output.toString()+'"}' + + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345/output.txt')) + def opts = Set.of(StandardOpenOption.READ) + + expect: + lid.text == "Hello, World!" + + cleanup: + outputMeta.deleteDir() + output.delete() + } + + def 'should not create a directory' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345')) + + when: + provider.createDirectory(lid) + then: + thrown(UnsupportedOperationException) + + } + + def 'should create directory stream' () { + given: + def output1 = data.resolve('path') + output1.mkdir() + output1.resolve('file1.txt').text = 'file1' + output1.resolve('file2.txt').text = 'file2' + output1.resolve('file3.txt').text = 'file3' + meta.resolve('12345/output1').mkdirs() + meta.resolve('12345/output2').mkdirs() + meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"FileOutput", "path": "' + output1.toString() + '"}' + + and: + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345/output1')) + def lid2 = provider.getPath(LinPath.asUri('lid://12345')) + + expect: + Files.exists(lid) + Files.exists(lid.resolve('file1.txt')) + Files.exists(lid.resolve('file2.txt')) + Files.exists(lid.resolve('file3.txt')) + + when: + provider.newDirectoryStream(lid2, (p) -> true) + then: + thrown(FileNotFoundException) + + when: + def stream = provider.newDirectoryStream(lid, (p) -> true) + and: + def result = stream.toList() + then: + result.toSet() == [ + lid.resolve('file1.txt'), + lid.resolve('file2.txt'), + lid.resolve('file3.txt') + ] as Set + + cleanup: + meta.resolve('12345').deleteDir() + output1.deleteDir() + + } + + def 'should not delete a file' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid = provider.getPath(LinPath.asUri('lid://12345')) + + when: + provider.delete(lid) + then: + thrown(UnsupportedOperationException) + + } + + def 'should not copy a file' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://54321/foo')) + + when: + provider.copy(lid1, lid2) + then: + thrown(UnsupportedOperationException) + } + + def 'should not move a file' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://54321/foo')) + + when: + provider.move(lid1, lid2) + then: + thrown(UnsupportedOperationException) + } + + def 'should check is same file' () { + given: + def folder = Files.createTempDirectory('test') + def config = [lineage:[store:[location:folder.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://54321/foo')) + def lid3 = provider.getPath(LinPath.asUri('lid://54321/foo')) + + expect: + !provider.isSameFile(lid1, lid2) + !provider.isSameFile(lid1, lid3) + and: + provider.isSameFile(lid2, lid3) + + cleanup: + folder?.deleteDir() + } + + def 'should check is hidden file' () { + given: + def folder = Files.createTempDirectory('test') + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + and: + def output = folder.resolve('path') + output.mkdir() + output.resolve('abc').text = 'file1' + output.resolve('.foo').text = 'file2' + meta.resolve('12345/output').mkdirs() + meta.resolve('12345/output/.data.json').text = '{"type":"FileOutput", "path": "' + output.toString() + '"}' + and: + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/output/abc')) + def lid2 = provider.getPath(LinPath.asUri('lid://12345/output/.foo')) + + expect: + !provider.isHidden(lid1) + provider.isHidden(lid2) + + cleanup: + folder?.deleteDir() + } + + def 'should read file attributes' () { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + def file = data.resolve('abc') + file.text = 'Hello' + meta.resolve('12345/abc').mkdirs() + meta.resolve('12345/abc/.data.json').text = '{"type":"FileOutput", "path": "' + file.toString() + '"}' + Global.session = Mock(Session) { getConfig()>>config } + and: + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + + when: + def attr1 = provider.readAttributes(lid1, BasicFileAttributes) + def real1= Files.readAttributes(file,BasicFileAttributes) + then: + !attr1.directory + attr1.isRegularFile() + attr1.size() == real1.size() + attr1.creationTime() == real1.creationTime() + attr1.lastModifiedTime() == real1.lastModifiedTime() + attr1.lastAccessTime() == real1.lastAccessTime() + + cleanup: + file?.delete() + meta.resolve('12345').deleteDir() + } + + def 'should throw exception in unsupported methods'() { + given: + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + def provider = new LinFileSystemProvider() + + when: + provider.newOutputStream(null) + then: + thrown(UnsupportedOperationException) + + when: + provider.getFileStore(null) + then: + thrown(UnsupportedOperationException) + + when: + provider.readAttributes(null, "attrib") + then: + thrown(UnsupportedOperationException) + + when: + provider.setAttribute(null, "attrib", null) + then: + thrown(UnsupportedOperationException) + } + + def 'should throw exception when checking access mode'(){ + given: + def config = [lineage:[store:[location:wdir.toString()]]] + Global.session = Mock(Session) { getConfig()>>config } + def provider = new LinFileSystemProvider() + def lid1 = provider.getPath(LinPath.asUri('lid://12345/abc')) + + when: + provider.checkAccess(lid1, AccessMode.WRITE) + then: + def ex1 = thrown(AccessDeniedException) + ex1.message == "Write mode not supported" + + when: + provider.checkAccess(lid1, AccessMode.EXECUTE) + then: + def ex2 = thrown(AccessDeniedException) + ex2.message == "Execute mode not supported" + } +} + diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathFactoryTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathFactoryTest.groovy new file mode 100644 index 0000000000..65b6318dac --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathFactoryTest.groovy @@ -0,0 +1,89 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import java.nio.file.Files +import java.nio.file.Path + +import nextflow.Global +import nextflow.Session +import spock.lang.Specification +import spock.lang.Unroll + +/** + * LID Path Factory tests. + * + * @author Jorge Ejarque + */ +class LinPathFactoryTest extends Specification { + + Path tmp + + def setup() { + tmp = Files.createTempDirectory("data") + Global.session = Mock(Session) { getConfig()>> [workflow:[lineage:[store:[location: tmp.toString()]]]] } + } + + def cleanup() { + Global.session = null + tmp.deleteDir() + } + + def 'should create lin path' () { + given: + def factory = new LinPathFactory() + + expect: + factory.parseUri('foo') == null + + when: + def p1 = factory.parseUri('lid://12345') + then: + p1.toUriString() == 'lid://12345' + + when: + def p2 = factory.parseUri('lid://12345/x/y/z') + then: + p2.toUriString() == 'lid://12345/x/y/z' + + when: + def p3 = factory.parseUri('lid://12345//x///y/z//') + then: + p3.toUriString() == 'lid://12345/x/y/z' + + when: + factory.parseUri('lid:///12345') + then: + thrown(IllegalArgumentException) + } + + @Unroll + def 'should convert get lid uri string' () { + given: + def factory = new LinPathFactory() + + when: + def lid = LinPathFactory.create(EXPECTED) + then: + factory.toUriString(lid) == EXPECTED + + where: + _ | EXPECTED + _ | 'lid://123' + _ | 'lid://123/a/b/c' + } +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy new file mode 100644 index 0000000000..92b48e16cc --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy @@ -0,0 +1,676 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.fs + +import nextflow.lineage.LinUtils +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.WorkflowRun +import nextflow.lineage.serde.LinEncoder +import nextflow.util.CacheHelper +import org.junit.Rule +import test.OutputCapture + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.ProviderMismatchException + +import spock.lang.Shared +import spock.lang.Specification +import spock.lang.Unroll + +import java.time.OffsetDateTime + +/** + * LID Path Tests + * @author Jorge Ejarque + */ +class LinPathTest extends Specification { + + @Shared + Path wdir + @Shared + Path meta + @Shared + Path data + @Shared + def fs = Mock(LinFileSystem) + + def setupSpec(){ + wdir = Files.createTempDirectory("wdir") + meta = wdir.resolve('.meta') + data = wdir.resolve('work') + } + + def cleanupSpec(){ + wdir.deleteDir() + } + + @Rule + OutputCapture capture = new OutputCapture() + + def 'should create from URI' () { + when: + def path = new LinPath(fs, new URI( URI_STRING )) + then: + path.filePath == PATH + path.fragment == FRAGMENT + path.query == QUERY + + where: + URI_STRING | PATH | QUERY | FRAGMENT + "lid://1234/hola" | "1234/hola" | null | null + "lid://1234/hola#frag.sub" | "1234/hola" | null | "frag.sub" + "lid://1234/#frag.sub" | "1234" | null | "frag.sub" + "lid://1234/?q=a&b=c" | "1234" | "q=a&b=c" | null + "lid://1234/?q=a&b=c#frag.sub" | "1234" | "q=a&b=c" | "frag.sub" + "lid:///" | "/" | null | null + } + + def 'should create correct lid Path' () { + when: + def lid = new LinPath(FS, PATH, MORE) + then: + lid.filePath == EXPECTED_FILE + where: + FS | PATH | MORE | EXPECTED_FILE + fs | '/' | [] as String[] | '/' + null | '/' | [] as String[] | '/' + fs | '/' | ['a','b'] as String[] | 'a/b' + null | '/' | ['a','b'] as String[] | 'a/b' + fs | '' | [] as String[] | '/' + null | '' | [] as String[] | '/' + fs | '' | ['a','b'] as String[] | 'a/b' + null | '' | ['a','b'] as String[] | 'a/b' + fs | '1234' | [] as String[] | '1234' + null | '1234' | [] as String[] | '1234' + fs | '1234' | ['a','b'] as String[] | '1234/a/b' + null | '1234' | ['a','b'] as String[] | '1234/a/b' + fs | '1234/c' | [] as String[] | '1234/c' + null | '1234/c' | [] as String[] | '1234/c' + fs | '1234/c' | ['a','b'] as String[] | '1234/c/a/b' + null | '1234/c' | ['a','b'] as String[] | '1234/c/a/b' + fs | '/1234/c' | [] as String[] | '1234/c' + null | '/1234/c' | [] as String[] | '1234/c' + fs | '/1234/c' | ['a','b'] as String[] | '1234/c/a/b' + null | '/1234/c' | ['a','b'] as String[] | '1234/c/a/b' + fs | '../c' | ['a','b'] as String[] | 'c/a/b' + null | '../c' | ['a','b'] as String[] | '../c/a/b' + fs | '../c' | [] as String[] | 'c' + null | '../c' | [] as String[] | '../c' + fs | '..' | [] as String[] | '/' + null | '..' | [] as String[] | '..' + fs | '/..' | [] as String[] | '/' + null | '/..' | [] as String[] | '/' + fs | './1234/c' | ['a','b'] as String[] | '1234/c/a/b' + null | './1234/c' | ['a','b'] as String[] | '1234/c/a/b' + fs | './1234/c' | [] as String[] | '1234/c' + null | './1234/c' | [] as String[] | '1234/c' + fs | '1234' | ['/'] as String[] | '1234' + null | '1234' | ['/'] as String[] | '1234' + null | '../../a/b' | [] as String[] | '../../a/b' + fs | '1234/' | [] as String[] | '1234' + null | '1234/' | [] as String[] | '1234' + } + + def 'should get target path' () { + given: + def outputFolder = data.resolve('output') + def outputSubFolder = outputFolder.resolve('some/path') + outputSubFolder.mkdirs() + def outputSubFolderFile = outputSubFolder.resolve('file1.txt') + outputSubFolderFile.text = "this is file1" + def outputFile = data.resolve('file2.txt') + outputFile.text = "this is file2" + + def lidFs = new LinFileSystemProvider().newFileSystem(new URI("lid:///"), [enabled: true, store: [location: wdir.toString()]]) + + meta.resolve('12345/output1').mkdirs() + meta.resolve('12345/path/to/file2.txt').mkdirs() + meta.resolve('12345/.data.json').text = '{"type":"TaskRun"}' + meta.resolve('12345/output1/.data.json').text = '{"type":"FileOutput", "path": "' + outputFolder.toString() + '"}' + meta.resolve('12345/path/to/file2.txt/.data.json').text = '{"type":"FileOutput", "path": "' + outputFile.toString() + '"}' + def time = OffsetDateTime.now() + def wfResultsMetadata = new LinEncoder().withPrettyPrint(true).encode(new WorkflowOutput(time, "lid://1234", [new Parameter( "Path", "a", "lid://1234/a.txt")])) + meta.resolve('5678/').mkdirs() + meta.resolve('5678/.data.json').text = wfResultsMetadata + + expect: 'Get real path when LinPath is the output data or a subfolder' + new LinPath(lidFs, '12345/output1').getTargetPath() == outputFolder + new LinPath(lidFs,'12345/output1/some/path').getTargetPath() == outputSubFolder + new LinPath(lidFs,'12345/output1/some/path/file1.txt').getTargetPath().text == outputSubFolderFile.text + new LinPath(lidFs, '12345/path/to/file2.txt').getTargetPath().text == outputFile.text + + when: 'LinPath fs is null' + new LinPath(null, '12345').getTargetPath() + then: + thrown(IllegalArgumentException) + + when: 'LinPath is empty' + new LinPath(lidFs, '/').getTargetPath() + then: + thrown(IllegalArgumentException) + + when: 'LinPath is not an output data description' + new LinPath(lidFs, '12345').getTargetPath() + then: + thrown(FileNotFoundException) + + when: 'LinPath is not subfolder of an output data description' + new LinPath(lidFs, '12345/path').getTargetPath() + then: + thrown(FileNotFoundException) + + when: 'LinPath subfolder of an output data description does not exist' + new LinPath(lidFs, '12345/output1/other/path').getTargetPath() + then: + thrown(FileNotFoundException) + + when: 'Lid does not exist' + new LinPath(lidFs, '23456').getTargetPath() + then: + thrown(FileNotFoundException) + + when: 'Lid description' + def result = new LinPath(lidFs, '5678').getTargetOrMetadataPath() + then: + result instanceof LinMetadataPath + result.text == wfResultsMetadata + + when: 'Lid description subobject' + def result2 = new LinPath(lidFs, '5678#output').getTargetOrMetadataPath() + then: + result2 instanceof LinMetadataPath + result2.text == LinUtils.encodeSearchOutputs([new Parameter("Path","a", "lid://1234/a.txt")], true) + + when: 'Lid subobject does not exist' + new LinPath(lidFs, '23456#notexists').getTargetOrMetadataPath() + then: + thrown(IllegalArgumentException) + } + + def 'should get subobjects as path' (){ + given: + def lidFs = new LinFileSystemProvider().newFileSystem(new URI("lid:///"), [enabled: true, store: [location: wdir.toString()]]) + def wf = new WorkflowRun(new Workflow([],"repo", "commit"), "sessionId", "runId", [new Parameter("String", "param1", "value1")]) + + when: 'workflow repo in workflow run' + Path p = LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["workflow", "repository"] as String[]) + then: + p instanceof LinMetadataPath + p.text == '"repo"' + + when: 'outputs' + def outputs = new WorkflowOutput(OffsetDateTime.now(), "lid://123456", [new Parameter("Collection", "samples", ["sample1", "sample2"])]) + lidFs.store.save("123456/output", outputs) + Path p2 = LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["output"] as String[]) + then: + p2 instanceof LinMetadataPath + p2.text == LinUtils.encodeSearchOutputs([new Parameter("Collection", "samples", ["sample1", "sample2"])], true) + + when: 'child does not exists' + LinPath.getMetadataAsTargetPath(wf, lidFs, "123456", ["no-exist"] as String[]) + then: + def exception = thrown(FileNotFoundException) + exception.message == "Target path '123456#no-exist' does not exist" + + when: 'outputs does not exists' + LinPath.getMetadataAsTargetPath(wf, lidFs, "6789", ["output"] as String[]) + then: + def exception1 = thrown(FileNotFoundException) + exception1.message == "Target path '6789#output' does not exist" + + when: 'null object' + LinPath.getMetadataAsTargetPath(null, lidFs, "123456", ["no-exist"] as String[]) + then: + def exception2 = thrown(FileNotFoundException) + exception2.message == "Target path '123456' does not exist" + + cleanup: + meta.resolve("123456").deleteDir() + } + + def 'should get file name' () { + when: + def lid1 = new LinPath(fs, '1234567890/this/file.bam') + then: + lid1.getFileName() == new LinPath(null, 'file.bam') + } + + def 'should get file parent' () { + when: + def lid1 = new LinPath(fs, '1234567890/this/file.bam') + then: + lid1.getParent() == new LinPath(fs, '1234567890/this') + lid1.getParent().getParent() == new LinPath(fs, '1234567890') + lid1.getParent().getParent().getParent() == new LinPath(fs, "/") + lid1.getParent().getParent().getParent().getParent() == null + } + + @Unroll + def 'should get name count' () { + expect: + new LinPath(fs, PATH).getNameCount() == EXPECTED + where: + PATH | EXPECTED + '/' | 0 + '123' | 1 + '123/a' | 2 + '123/a/' | 2 + '123/a/b' | 3 + '' | 0 + } + + @Unroll + def 'should get name by index' () { + expect: + new LinPath(fs, PATH).getName(INDEX) == EXPECTED + where: + PATH | INDEX | EXPECTED + '123' | 0 | new LinPath(fs, '123') + '123/a' | 1 | new LinPath(null, 'a') + '123/a/' | 1 | new LinPath(null, 'a') + '123/a/b' | 2 | new LinPath(null, 'b') + } + + @Unroll + def 'should get subpath' () { + expect: + new LinPath(fs, PATH).subpath(BEGIN,END) == EXPECTED + where: + PATH | BEGIN | END | EXPECTED + '123' | 0 | 1 | new LinPath(fs, '123') + '123/a' | 0 | 2 | new LinPath(fs, '123/a') + '123/a/' | 0 | 2 | new LinPath(fs, '123/a') + '123/a' | 1 | 2 | new LinPath(null, 'a') + '123/a/' | 1 | 2 | new LinPath(null, 'a') + '123/a/b' | 2 | 3 | new LinPath(null, 'b') + '123/a/b' | 1 | 3 | new LinPath(null, 'a/b') + } + + def 'should normalize a path' () { + expect: + new LinPath(fs, '123').normalize() == new LinPath(fs, '123') + new LinPath(fs, '123/a/b').normalize() == new LinPath(fs, '123/a/b') + new LinPath(fs, '123/./a/b').normalize() == new LinPath(fs, '123/a/b') + new LinPath(fs, '123/a/../a/b').normalize() == new LinPath(fs, '123/a/b') + } + + @Unroll + def 'should validate startWith' () { + expect: + new LinPath(fs,PATH).startsWith(OTHER) == EXPECTED + where: + PATH | OTHER | EXPECTED + '12345/a/b' | '12345' | true + '12345/a/b' | '12345/a' | true + '12345/a/b' | '12345/a/b' | true + and: + '12345/a/b' | '12345/b' | false + '12345/a/b' | 'xyz' | false + } + + @Unroll + def 'should validate endsWith' () { + expect: + new LinPath(fs,PATH).endsWith(OTHER) == EXPECTED + where: + PATH | OTHER | EXPECTED + '12345/a/b' | 'b' | true + '12345/a/b' | 'a/b' | true + '12345/a/b' | '12345/a/b' | true + and: + '12345/a/b' | '12345/b' | false + '12345/a/b' | 'xyz' | false + } + + def 'should validate isAbsolute' () { + expect: + new LinPath(fs,'1234/a/b/c').isAbsolute() + new LinPath(fs,'1234/a/b/c').getRoot().isAbsolute() + new LinPath(fs,'1234/a/b/c').getParent().isAbsolute() + new LinPath(fs,'1234/a/b/c').normalize().isAbsolute() + new LinPath(fs,'1234/a/b/c').getName(0).isAbsolute() + new LinPath(fs,'1234/a/b/c').subpath(0,2).isAbsolute() + and: + !new LinPath(fs,'1234/a/b/c').getFileName().isAbsolute() + !new LinPath(fs,'1234/a/b/c').getName(1).isAbsolute() + !new LinPath(fs,'1234/a/b/c').subpath(1,3).isAbsolute() + } + + @Unroll + def 'should get root path' () { + expect: + new LinPath(fs,PATH).getRoot() == new LinPath(fs,EXPECTED) + where: + PATH | EXPECTED + '12345' | '/' + '12345/a' | '/' + } + + def 'should relativize path' () { + expect: + BASE_PATH.relativize(PATH) == EXPECTED + where : + BASE_PATH | PATH | EXPECTED + new LinPath(fs, '/') | new LinPath(fs, '123/a/b/c') | new LinPath(null, '123/a/b/c') + new LinPath(fs,'123/a/') | new LinPath(fs, '123/a/b/c') | new LinPath(null, 'b/c') + new LinPath(fs,'123/a/') | new LinPath(fs, '321/a/') | new LinPath(null, '../../321/a') + new LinPath(null,'123/a') | new LinPath(null, '123/a/b/c') | new LinPath(null, 'b/c') + new LinPath(null,'123/a') | new LinPath(null, '321/a') | new LinPath(null, '../../321/a') + new LinPath(fs,'../a/') | new LinPath(fs, '321/a') | new LinPath(null, '../321/a') + new LinPath(fs,'321/a/') | new LinPath(fs, '../a') | new LinPath(null, '../../a') + new LinPath(null,'321/a/') | new LinPath(null, '../a') | new LinPath(null, '../../../a') + } + + def 'relativize should throw exception' () { + given: + def lid1 = new LinPath(fs,'123/a/') + def lid2 = new LinPath(null,'123/a/') + def lid3 = new LinPath(null, '../a/b') + when: 'comparing relative with absolute' + lid1.relativize(lid2) + then: + thrown(IllegalArgumentException) + + when: 'undefined base path' + lid3.relativize(lid2) + then: + thrown(IllegalArgumentException) + } + + def 'should resolve path' () { + when: + def lid1 = new LinPath(fs, '123/a/b/c') + def lid2 = new LinPath(fs, '321/x/y/z') + def rel1 = new LinPath(null, 'foo') + def rel2 = new LinPath(null, 'bar/') + + then: + lid1.resolve(lid2) == lid2 + lid2.resolve(lid1) == lid1 + and: + lid1.resolve(rel1) == new LinPath(fs,'123/a/b/c/foo') + lid1.resolve(rel2) == new LinPath(fs,'123/a/b/c/bar') + and: + rel1.resolve(rel2) == new LinPath(null, 'foo/bar') + rel2.resolve(rel1) == new LinPath(null, 'bar/foo') + } + + def 'should resolve path as string' () { + given: + def pr = Mock(LinFileSystemProvider) + def lidfs = Mock(LinFileSystem){ + provider() >> pr} + + + def lid1 = new LinPath(lidfs, '123/a/b/c') + + expect: + lid1.resolve('x/y') == new LinPath(lidfs, '123/a/b/c/x/y') + lid1.resolve('/x/y/') == new LinPath(lidfs, '123/a/b/c/x/y') + + when: + def result = lid1.resolve('lid://321') + then: + pr.getPath(LinPath.asUri('lid://321')) >> new LinPath(lidfs, '321') + and: + result == new LinPath(lidfs, '321') + } + + def 'should throw illegal exception when not correct scheme' (){ + when: 'creation' + new LinPath(fs, new URI("http://1234")) + then: + thrown(IllegalArgumentException) + + when: 'asUri' + LinPath.asUri("http://1234") + then: + thrown(IllegalArgumentException) + + when: 'asUri' + LinPath.asUri("") + then: + thrown(IllegalArgumentException) + + } + + def 'should throw provider mismatch exception when different path types' () { + given: + def pr = Mock(LinFileSystemProvider) + def fs = Mock(LinFileSystem){ + provider() >> pr} + and: + def lid = new LinPath(fs, '123/a/b/c') + + when: 'resolve with path' + lid.resolve(Path.of('d')) + then: + thrown(ProviderMismatchException) + + when: 'resolve with uri string' + lid.resolve(Path.of('http://1234')) + then: + thrown(ProviderMismatchException) + + when: 'relativize' + lid.relativize(Path.of('d')) + then: + thrown(ProviderMismatchException) + } + + def 'should throw exception for unsupported methods' () { + given: + def pr = Mock(LinFileSystemProvider) + def fs = Mock(LinFileSystem){ + provider() >> pr} + and: + def lid = new LinPath(fs, '123/a/b/c') + + when: 'to file' + lid.toFile() + then: + thrown(UnsupportedOperationException) + + when: 'register' + lid.register(null, null,null) + then: + thrown(UnsupportedOperationException) + } + + def 'should throw exception for incorrect index'() { + when: 'getting name with negative index' + new LinPath(fs, "1234").getName(-1) + then: + thrown(IllegalArgumentException) + + when: 'getting name with larger index tha namecount' + new LinPath(fs, "1234").getName(2) + then: + thrown(IllegalArgumentException) + + when: 'getting subpath with negative index' + new LinPath(fs, "1234").subpath(-1,1) + then: + thrown(IllegalArgumentException) + + when: 'getting subpath with larger index tha namecount' + new LinPath(fs, "1234").subpath(0,2) + then: + thrown(IllegalArgumentException) + + } + + @Unroll + def 'should get to uri string' () { + expect: + new LinPath(fs, PATH).toUriString() == EXPECTED + where: + PATH | EXPECTED + '/' | 'lid:///' + '1234' | 'lid://1234' + '1234/a/b/c' | 'lid://1234/a/b/c' + '' | 'lid:///' + } + + @Unroll + def 'should get string' () { + expect: + new LinPath(fs, PATH).toString() == EXPECTED + where: + PATH | EXPECTED + '/' | '/' + '1234' | '1234' + '1234/a/b/c' | '1234/a/b/c' + '' | '/' + } + + @Unroll + def 'should validate asString method'() { + expect: + LinPath.asUriString(FIRST, MORE as String[]) == EXPECTED + + where: + FIRST | MORE | EXPECTED + 'foo' | [] | 'lid://foo' + 'foo/' | [] | 'lid://foo' + '/foo' | [] | 'lid://foo' + and: + 'a' | ['/b/'] | 'lid://a/b' + 'a' | ['/b','c'] | 'lid://a/b/c' + 'a' | ['/b','//c'] | 'lid://a/b/c' + 'a' | ['/b/c', 'd'] | 'lid://a/b/c/d' + '/a/' | ['/b/c', 'd'] | 'lid://a/b/c/d' + } + + @Unroll + def 'should check is lid uri string' () { + expect: + LinPath.isLidUri(STR) == EXPECTED + + where: + STR | EXPECTED + null | false + '' | false + 'foo' | false + '/foo' | false + 'lid:/foo' | false + 'lid:foo' | false + 'lid/foo' | false + and: + 'lid://' | true + 'lid:///' | true + 'lid://foo/bar' | true + } + + def 'should detect equals'(){ + expect: + new LinPath(FS1, PATH1).equals(new LinPath(FS2, PATH2)) == EXPECTED + where: + FS1 | FS2 | PATH1 | PATH2 | EXPECTED + null | fs | "12345/path" | "12345/path" | false + fs | null | "12345/path" | "12345/path" | false + null | null | "12345/" | "12345/path" | false + fs | fs | "12345/" | "12345/path" | false + and: + null | null | "12345/path" | "12345/path" | true + fs | fs | "12345/path" | "12345/path" | true + null | null | "12345/" | "12345" | true + fs | fs | "12345/" | "12345 " | true + } + + def 'should validate correct hash'(){ + when: + def file = wdir.resolve("file.txt") + file.text = "this is a data file" + def hash = CacheHelper.hasher(file).hash().toString() + def correctData = new FileOutput(file.toString(), new Checksum(hash,"nextflow", "standard")) + LinPath.validateDataOutput(correctData) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 0 + + cleanup: + file.delete() + } + + def 'should warn with incorrect hash'(){ + when: + def file = wdir.resolve("file.txt") + file.text = "this is a data file" + def hash = CacheHelper.hasher(file).hash().toString() + def correctData = new FileOutput(file.toString(), new Checksum("abscd","nextflow", "standard")) + LinPath.validateDataOutput(correctData) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0].endsWith("Checksum of '$file' does not match with the one stored in the metadata") + + cleanup: + file.delete() + } + + def 'should warn when hash algorithm is not supported'(){ + when: + def file = wdir.resolve("file.txt") + file.text = "this is a data file" + def hash = CacheHelper.hasher(file).hash().toString() + def correctData = new FileOutput(file.toString(), new Checksum(hash,"not-supported", "standard")) + LinPath.validateDataOutput(correctData) + def stdout = capture + .toString() + .readLines()// remove the log part + .findResults { line -> !line.contains('DEBUG') ? line : null } + .findResults { line -> !line.contains('INFO') ? line : null } + .findResults { line -> !line.contains('plugin') ? line : null } + + then: + stdout.size() == 1 + stdout[0].endsWith("Checksum of '$file' can't be validated. Algorithm 'not-supported' is not supported") + + cleanup: + file.delete() + } + + def 'should throw exception when file not found validating hash'(){ + when: + def correctData = new FileOutput("not/existing/file", new Checksum("120741","nextflow", "standard")) + LinPath.validateDataOutput(correctData) + + then: + thrown(FileNotFoundException) + + } + + +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/model/ChecksumTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/model/ChecksumTest.groovy new file mode 100644 index 0000000000..b4fb304a97 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/model/ChecksumTest.groovy @@ -0,0 +1,57 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.model + +import nextflow.util.CacheHelper +import spock.lang.Specification + +/** + * + * @author Paolo Di Tommaso + */ +class ChecksumTest extends Specification { + + def 'should create a checksum'() { + given: + def checksum = new Checksum(algorithm: 'sha1', value: '1234567890abcdef', mode: 'hex') + + expect: + checksum.algorithm == 'sha1' + checksum.value == '1234567890abcdef' + checksum.mode == 'hex' + } + + def 'should create a checksum with of factory method'() { + given: + def checksum1 = Checksum.of('1234567890abcdef','sha1', CacheHelper.HashMode.DEFAULT()) + + expect: + checksum1.algorithm == 'sha1' + checksum1.value == '1234567890abcdef' + checksum1.mode == 'standard' + } + + def 'should create checksum with ofNextflow factory method'() { + given: + def checksum1 = Checksum.ofNextflow('1234567890abcdef') + + expect: + checksum1.algorithm == 'nextflow' + checksum1.value == CacheHelper.hasher('1234567890abcdef').hash().toString() + checksum1.mode == 'standard' + } +} diff --git a/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy b/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy new file mode 100644 index 0000000000..e747151b52 --- /dev/null +++ b/modules/nf-lineage/src/test/nextflow/lineage/serde/LinEncoderTest.groovy @@ -0,0 +1,171 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.lineage.serde + +import nextflow.lineage.model.Checksum +import nextflow.lineage.model.DataPath +import nextflow.lineage.model.Parameter +import nextflow.lineage.model.FileOutput +import nextflow.lineage.model.TaskOutput +import nextflow.lineage.model.TaskRun +import nextflow.lineage.model.Workflow +import nextflow.lineage.model.WorkflowOutput +import nextflow.lineage.model.WorkflowRun +import spock.lang.Specification + +import java.time.OffsetDateTime + +class LinEncoderTest extends Specification{ + + def 'should encode and decode Outputs'(){ + given: + def encoder = new LinEncoder() + and: + def output = new FileOutput("/path/to/file", new Checksum("hash_value", "hash_algorithm", "standard"), + "lid://source", "lid://workflow", "lid://task", 1234) + + when: + def encoded = encoder.encode(output) + def object = encoder.decode(encoded) + + then: + object instanceof FileOutput + def result = object as FileOutput + result.path == "/path/to/file" + result.checksum instanceof Checksum + result.checksum.value == "hash_value" + result.checksum.algorithm == "hash_algorithm" + result.checksum.mode == "standard" + result.source == "lid://source" + result.size == 1234 + + } + + def 'should encode and decode WorkflowRuns'(){ + given: + def encoder = new LinEncoder() + and: + def uniqueId = UUID.randomUUID() + def mainScript = new DataPath("file://path/to/main.nf", new Checksum("78910", "nextflow", "standard")) + def workflow = new Workflow([mainScript], "https://nextflow.io/nf-test/", "123456") + def wfRun = new WorkflowRun(workflow, uniqueId.toString(), "test_run", [new Parameter("String", "param1", "value1"), new Parameter("String", "param2", "value2")]) + + when: + def encoded = encoder.encode(wfRun) + def object = encoder.decode(encoded) + + then: + object instanceof WorkflowRun + def result = object as WorkflowRun + result.workflow instanceof Workflow + result.workflow.scriptFiles.first instanceof DataPath + result.workflow.scriptFiles.first.path == "file://path/to/main.nf" + result.workflow.scriptFiles.first.checksum instanceof Checksum + result.workflow.scriptFiles.first.checksum.value == "78910" + result.workflow.commitId == "123456" + result.sessionId == uniqueId.toString() + result.name == "test_run" + result.params.size() == 2 + result.params.get(0).name == "param1" + } + + def 'should encode and decode WorkflowResults'(){ + given: + def encoder = new LinEncoder() + and: + def time = OffsetDateTime.now() + def wfResults = new WorkflowOutput(time, "lid://1234", [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")]) + when: + def encoded = encoder.encode(wfResults) + def object = encoder.decode(encoded) + + then: + object instanceof WorkflowOutput + def result = object as WorkflowOutput + result.createdAt == time + result.workflowRun == "lid://1234" + result.output == [new Parameter("String", "a", "A"), new Parameter("String", "b", "B")] + } + + def 'should encode and decode TaskRun'() { + given: + def encoder = new LinEncoder() + and: + def uniqueId = UUID.randomUUID() + def taskRun = new TaskRun( + uniqueId.toString(),"name", new Checksum("78910", "nextflow", "standard"), 'this is a script', + [new Parameter("String", "param1", "value1")], "container:version", "conda", "spack", "amd64", + [a: "A", b: "B"], [new DataPath("path/to/file", new Checksum("78910", "nextflow", "standard"))] + ) + when: + def encoded = encoder.encode(taskRun) + def object = encoder.decode(encoded) + then: + object instanceof TaskRun + def result = object as TaskRun + result.sessionId == uniqueId.toString() + result.name == "name" + result.codeChecksum.value == "78910" + result.script == "this is a script" + result.input.size() == 1 + result.input.get(0).name == "param1" + result.container == "container:version" + result.conda == "conda" + result.spack == "spack" + result.architecture == "amd64" + result.globalVars == [a: "A", b: "B"] + result.binEntries.size() == 1 + result.binEntries.get(0).path == "path/to/file" + result.binEntries.get(0).checksum.value == "78910" + } + + def 'should encode and decode TaskResults'(){ + given: + def encoder = new LinEncoder() + and: + def time = OffsetDateTime.now() + def parameter = new Parameter("a","b", "c") + def wfResults = new TaskOutput("lid://1234", "lid://5678", time, [parameter], null) + when: + def encoded = encoder.encode(wfResults) + def object = encoder.decode(encoded) + + then: + object instanceof TaskOutput + def result = object as TaskOutput + result.createdAt == time + result.taskRun == "lid://1234" + result.workflowRun == "lid://5678" + result.output.size() == 1 + result.output[0] == parameter + } + + def 'object with null date attributes' () { + given: + def encoder = new LinEncoder() + and: + def wfResults = new WorkflowOutput(null, "lid://1234") + when: + def encoded = encoder.encode(wfResults) + def object = encoder.decode(encoded) + then: + encoded == '{"type":"WorkflowOutput","createdAt":null,"workflowRun":"lid://1234","output":null,"annotations":null}' + def result = object as WorkflowOutput + result.createdAt == null + + } +} diff --git a/packing.gradle b/packing.gradle index 07e404c0d9..c22846f31a 100644 --- a/packing.gradle +++ b/packing.gradle @@ -14,8 +14,9 @@ dependencies { api project(':nextflow') // include Ivy at runtime in order to have Grape @Grab work correctly defaultCfg "org.apache.ivy:ivy:2.5.2" - // default cfg = runtime + httpfs + amazon + tower client + wave client + // default cfg = runtime + httpfs + lineage + amazon + tower client + wave client defaultCfg project(':nf-httpfs') + defaultCfg project(':nf-lineage') console project(':plugins:nf-console') google project(':plugins:nf-google') amazon project(':plugins:nf-amazon') diff --git a/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy b/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy index 383fa60b96..07de8a5c99 100644 --- a/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy +++ b/plugins/nf-amazon/src/test/nextflow/extension/PublishOpS3Test.groovy @@ -41,6 +41,7 @@ class PublishOpS3Test extends BaseSpec { getBucketDir() >> BUCKET_DIR } + def op = new PublishOp(sess, 'foo', Mock(DataflowReadChannel), [to:'/target']) when: diff --git a/settings.gradle b/settings.gradle index b8a60d8e72..53d56ba13b 100644 --- a/settings.gradle +++ b/settings.gradle @@ -27,6 +27,7 @@ include 'nextflow' include 'nf-commons' include 'nf-httpfs' include 'nf-lang' +include 'nf-lineage' rootProject.children.each { prj -> prj.projectDir = new File("$rootDir/modules/$prj.name")