From 0de777a3d39cff04b049dcd0362cb8846e66aedd Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 11:33:29 -0600 Subject: [PATCH 01/47] Add initial prototype Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Session.groovy | 9 ++ .../groovy/nextflow/script/BaseScript.groovy | 10 ++ .../nextflow/script/ProcessConfig.groovy | 8 +- .../nextflow/script/WorkflowPublisher.groovy | 113 ++++++++++++++ .../nextflow/script/dsl/OutputDsl.groovy | 145 ++++++++++++++++++ 5 files changed, 284 insertions(+), 1 deletion(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 5ca23559d7..32ea5609cd 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -63,6 +63,7 @@ import nextflow.script.ScriptFile import nextflow.script.ScriptMeta import nextflow.script.ScriptRunner import nextflow.script.WorkflowMetadata +import nextflow.script.WorkflowPublisher import nextflow.spack.SpackConfig import nextflow.trace.AnsiLogObserver import nextflow.trace.TraceObserver @@ -275,6 +276,8 @@ class Session implements ISession { AnsiLogObserver ansiLogObserver + WorkflowPublisher publisher + FilePorter getFilePorter() { filePorter } /** @@ -1036,6 +1039,9 @@ class Session implements ISession { final trace = handler.safeTraceRecord() cache.putTaskAsync(handler, trace) + // notfiy the workflow publisher + publisher.publish(handler.task) + // notify the event to the observers for( int i=0; i, Cloneable { final isNegated = pattern.startsWith('!') if( isNegated ) pattern = pattern.substring(1).trim() - return Pattern.compile(pattern).matcher(name).matches() ^ isNegated + return compilePattern(pattern).matcher(name).matches() ^ isNegated + } + + @Memoized(maxCacheSize = 10_000) + private static Pattern compilePattern(String pattern) { + Pattern.compile(pattern) } /** diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy new file mode 100644 index 0000000000..a603ccc49d --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy @@ -0,0 +1,113 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.script + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import groovy.transform.TupleConstructor +import nextflow.Const +import nextflow.exception.ScriptRuntimeException +import nextflow.processor.PublishDir +import nextflow.processor.TaskRun +import nextflow.script.params.FileOutParam +import nextflow.script.ProcessConfig +/** + * Models the workflow outputs definition and publishing + * + * @author Ben Sherman + */ +@CompileStatic +class WorkflowPublisher { + private List publishers = [] + + WorkflowPublisher(Path path, List collections) { + for( def collection : collections ) { + for( def selector : collection.selectors ) { + final params = [ + path: path.resolve(collection.path).resolve(selector.path), + pattern: selector.pattern, + failOnError: true + ] + publishers << new PublisherEntry(selector.name, PublishDir.create(params)) + } + } + } + + void publish(TaskRun task) { + // collect task output files + HashSet files = [] + final outputs = task.getOutputsByType(FileOutParam) + for( Map.Entry entry : outputs ) { + final value = entry.value + if( value instanceof Path ) + files.add((Path)value) + else if( value instanceof Collection ) + files.addAll(value) + else if( value != null ) + throw new IllegalArgumentException("Unknown output file object [${value.class.name}]: ${value}") + } + + // apply each publisher with matching process selector to task + final processName = task.processor.name + final simpleName = processName.split(Const.SCOPE_SEP).last() + for( final entry : publishers ) { + final selector = entry.selector + final publisher = entry.publisher + if( ProcessConfig.matchesSelector(simpleName, selector) || ProcessConfig.matchesSelector(processName, selector) ) + publisher.apply(files, task) + } + } + + @TupleConstructor + private static class PublisherEntry { + String selector + PublishDir publisher + } +} + +@CompileStatic +@TupleConstructor +class OutputCollection { + + String path + List selectors + Index index + + static class Selector { + String name + String path + String pattern + + Selector(String name, String path, String pattern) { + this.name = name + this.path = path + this.pattern = pattern + } + + Selector(String name) { + this(name, '.', null) + } + } + + @TupleConstructor + static class Index { + String format + String path + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy new file mode 100644 index 0000000000..45cb393d80 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy @@ -0,0 +1,145 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.script.dsl + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import nextflow.exception.ScriptRuntimeException +import nextflow.script.OutputCollection +import nextflow.script.WorkflowPublisher +import org.codehaus.groovy.runtime.InvokerHelper +/** + * Implements the DSL for top-level workflow outputs + * + * @author Ben Sherman + */ +@CompileStatic +class OutputDsl { + + private Path path = Path.of('.') + + private List collections = [] + + void path(String path) { + this.path = path as Path + } + + void collect(String name, Closure closure) { + final dsl = new OutputCollectionDsl() + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + this.collections << dsl.build() + } + + WorkflowPublisher build() { + new WorkflowPublisher(path, collections) + } + +} + +@CompileStatic +class OutputCollectionDsl { + + private String path = '.' + + private List selectors = [] + + private OutputCollection.Index index + + void path(String path) { + this.path = path + } + + void select(String name) { + this.selectors << new OutputCollection.Selector(name) + } + + void select(String name, Closure closure) { + final dsl = new SelectorDsl() + dsl.name(name) + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + final selector = dsl.build() + if( selector ) + this.selectors << selector + } + + void index(Closure closure) { + final dsl = new IndexDsl() + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + this.index = dsl.build() + } + + OutputCollection build() { + new OutputCollection(path, selectors, index) + } + + static class SelectorDsl { + String name + boolean enabled = true + String path = '.' + String pattern + + void name(String name) { + this.name = name + } + + void when(boolean enabled) { + this.enabled = enabled + } + + void path(String path) { + this.path = path + } + + void pattern(String pattern) { + this.pattern = pattern + } + + OutputCollection.Selector build() { + enabled + ? new OutputCollection.Selector(name, path, pattern) + : null + } + } + + static class IndexDsl { + private String format + private String path + + void format(String format) { + this.format = format + } + + void path(String path) { + this.path = path + } + + OutputCollection.Index build() { + new OutputCollection.Index(format, path) + } + } + +} From 14beedcc1df67fe1d49ae069731167b8cc33fb7b Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 12:07:09 -0600 Subject: [PATCH 02/47] Fix race condition Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/script/WorkflowPublisher.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy index a603ccc49d..6d6aa67a84 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy @@ -69,7 +69,7 @@ class WorkflowPublisher { final selector = entry.selector final publisher = entry.publisher if( ProcessConfig.matchesSelector(simpleName, selector) || ProcessConfig.matchesSelector(processName, selector) ) - publisher.apply(files, task) + synchronized (publisher) { publisher.apply(files, task) } } } From 062b4219a716c32a796bf92182c0bb3eb38b622d Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 15:01:01 -0600 Subject: [PATCH 03/47] Simplify output DSL Signed-off-by: Ben Sherman --- .../nextflow/script/WorkflowPublisher.groovy | 73 ++++------- .../nextflow/script/dsl/OutputDsl.groovy | 113 ++++-------------- 2 files changed, 47 insertions(+), 139 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy index 6d6aa67a84..773f8cede0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy @@ -19,9 +19,7 @@ package nextflow.script import java.nio.file.Path import groovy.transform.CompileStatic -import groovy.transform.TupleConstructor import nextflow.Const -import nextflow.exception.ScriptRuntimeException import nextflow.processor.PublishDir import nextflow.processor.TaskRun import nextflow.script.params.FileOutParam @@ -33,19 +31,10 @@ import nextflow.script.ProcessConfig */ @CompileStatic class WorkflowPublisher { - private List publishers = [] + private List selectors - WorkflowPublisher(Path path, List collections) { - for( def collection : collections ) { - for( def selector : collection.selectors ) { - final params = [ - path: path.resolve(collection.path).resolve(selector.path), - pattern: selector.pattern, - failOnError: true - ] - publishers << new PublisherEntry(selector.name, PublishDir.create(params)) - } - } + WorkflowPublisher(List selectors) { + this.selectors = selectors } void publish(TaskRun task) { @@ -65,49 +54,29 @@ class WorkflowPublisher { // apply each publisher with matching process selector to task final processName = task.processor.name final simpleName = processName.split(Const.SCOPE_SEP).last() - for( final entry : publishers ) { - final selector = entry.selector - final publisher = entry.publisher - if( ProcessConfig.matchesSelector(simpleName, selector) || ProcessConfig.matchesSelector(processName, selector) ) - synchronized (publisher) { publisher.apply(files, task) } + for( final selector : selectors ) { + if( ProcessConfig.matchesSelector(simpleName, selector.name) || ProcessConfig.matchesSelector(processName, selector.name) ) { + final params = [ + path: selector.path, + pattern: selector.pattern, + failOnError: true, + overwrite: !task.cached + ] + PublishDir.create(params).apply(files, task) + } } } - - @TupleConstructor - private static class PublisherEntry { - String selector - PublishDir publisher - } } @CompileStatic -@TupleConstructor -class OutputCollection { - - String path - List selectors - Index index - - static class Selector { - String name - String path - String pattern - - Selector(String name, String path, String pattern) { - this.name = name - this.path = path - this.pattern = pattern - } +class OutputSelector { + String name + Path path + String pattern - Selector(String name) { - this(name, '.', null) - } - } - - @TupleConstructor - static class Index { - String format - String path + OutputSelector(String name, Path path, Map opts) { + this.name = name + this.path = path + this.pattern = opts.pattern } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy index 45cb393d80..144118ffc4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy @@ -19,10 +19,8 @@ package nextflow.script.dsl import java.nio.file.Path import groovy.transform.CompileStatic -import nextflow.exception.ScriptRuntimeException -import nextflow.script.OutputCollection +import nextflow.script.OutputSelector import nextflow.script.WorkflowPublisher -import org.codehaus.groovy.runtime.InvokerHelper /** * Implements the DSL for top-level workflow outputs * @@ -31,115 +29,56 @@ import org.codehaus.groovy.runtime.InvokerHelper @CompileStatic class OutputDsl { - private Path path = Path.of('.') + private List selectors = [] - private List collections = [] - - void path(String path) { - this.path = path as Path - } - - void collect(String name, Closure closure) { - final dsl = new OutputCollectionDsl() + void path(String path, Closure closure) { + final dsl = new OutputPathDsl(this, Path.of(path)) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) cl.call() - this.collections << dsl.build() + } + + void select(Map opts=[:], String name) { + if( opts.enabled == false ) + return + this.selectors << new OutputSelector(name, Path.of('.'), opts) + } + + void addSelector(OutputSelector selector) { + this.selectors << selector } WorkflowPublisher build() { - new WorkflowPublisher(path, collections) + new WorkflowPublisher(selectors) } } @CompileStatic -class OutputCollectionDsl { - - private String path = '.' +class OutputPathDsl { - private List selectors = [] + private OutputDsl root - private OutputCollection.Index index + private Path path - void path(String path) { + OutputPathDsl(OutputDsl root, Path path) { + this.root = root this.path = path } - void select(String name) { - this.selectors << new OutputCollection.Selector(name) - } - - void select(String name, Closure closure) { - final dsl = new SelectorDsl() - dsl.name(name) + void path(String subpath, Closure closure) { + final dsl = new OutputPathDsl(root, path.resolve(subpath)) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) cl.call() - final selector = dsl.build() - if( selector ) - this.selectors << selector } - void index(Closure closure) { - final dsl = new IndexDsl() - final cl = (Closure)closure.clone() - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) - cl.call() - this.index = dsl.build() - } - - OutputCollection build() { - new OutputCollection(path, selectors, index) - } - - static class SelectorDsl { - String name - boolean enabled = true - String path = '.' - String pattern - - void name(String name) { - this.name = name - } - - void when(boolean enabled) { - this.enabled = enabled - } - - void path(String path) { - this.path = path - } - - void pattern(String pattern) { - this.pattern = pattern - } - - OutputCollection.Selector build() { - enabled - ? new OutputCollection.Selector(name, path, pattern) - : null - } - } - - static class IndexDsl { - private String format - private String path - - void format(String format) { - this.format = format - } - - void path(String path) { - this.path = path - } - - OutputCollection.Index build() { - new OutputCollection.Index(format, path) - } + void select(Map opts=[:], String name) { + if( opts.enabled == false ) + return + root.addSelector(new OutputSelector(name, path, opts)) } } From 06a58729877c7387c00bb784adec398b9ee31fec Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 19:22:59 -0600 Subject: [PATCH 04/47] Support default publish options in path method Signed-off-by: Ben Sherman --- .../nextflow/script/WorkflowPublisher.groovy | 7 +++---- .../nextflow/script/dsl/OutputDsl.groovy | 21 +++++++------------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy index 773f8cede0..08f825760c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy @@ -58,11 +58,10 @@ class WorkflowPublisher { if( ProcessConfig.matchesSelector(simpleName, selector.name) || ProcessConfig.matchesSelector(processName, selector.name) ) { final params = [ path: selector.path, - pattern: selector.pattern, failOnError: true, overwrite: !task.cached ] - PublishDir.create(params).apply(files, task) + PublishDir.create(params + selector.opts).apply(files, task) } } } @@ -72,11 +71,11 @@ class WorkflowPublisher { class OutputSelector { String name Path path - String pattern + Map opts OutputSelector(String name, Path path, Map opts) { this.name = name this.path = path - this.pattern = opts.pattern + this.opts = opts } } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy index 144118ffc4..3cbc6d8ca4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy @@ -31,8 +31,8 @@ class OutputDsl { private List selectors = [] - void path(String path, Closure closure) { - final dsl = new OutputPathDsl(this, Path.of(path)) + void path(Map opts=[:], String path, Closure closure) { + final dsl = new OutputPathDsl(this, Path.of(path), opts) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) @@ -40,8 +40,6 @@ class OutputDsl { } void select(Map opts=[:], String name) { - if( opts.enabled == false ) - return this.selectors << new OutputSelector(name, Path.of('.'), opts) } @@ -52,23 +50,23 @@ class OutputDsl { WorkflowPublisher build() { new WorkflowPublisher(selectors) } - } @CompileStatic class OutputPathDsl { private OutputDsl root - private Path path + private Map defaults - OutputPathDsl(OutputDsl root, Path path) { + OutputPathDsl(OutputDsl root, Path path, Map defaults) { this.root = root this.path = path + this.defaults = defaults } - void path(String subpath, Closure closure) { - final dsl = new OutputPathDsl(root, path.resolve(subpath)) + void path(Map opts=[:], String subpath, Closure closure) { + final dsl = new OutputPathDsl(root, path.resolve(subpath), defaults + opts) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) @@ -76,9 +74,6 @@ class OutputPathDsl { } void select(Map opts=[:], String name) { - if( opts.enabled == false ) - return - root.addSelector(new OutputSelector(name, path, opts)) + root.addSelector(new OutputSelector(name, path, defaults + opts)) } - } From eecf6192ef2e94f707f596b2d3e6257ca8df2d4e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Mar 2024 17:17:11 -0500 Subject: [PATCH 05/47] Rename OutputDsl -> WorkflowPublishDsl Signed-off-by: Ben Sherman --- .../groovy/nextflow/script/BaseScript.groovy | 4 +- ...utDsl.groovy => WorkflowPublishDsl.groovy} | 43 +++++++++---------- 2 files changed, 22 insertions(+), 25 deletions(-) rename modules/nextflow/src/main/groovy/nextflow/script/dsl/{OutputDsl.groovy => WorkflowPublishDsl.groovy} (63%) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy index 8fa6ebe997..558daf1e74 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy @@ -23,7 +23,7 @@ import groovy.util.logging.Slf4j import nextflow.NextflowMeta import nextflow.Session import nextflow.exception.AbortOperationException -import nextflow.script.dsl.OutputDsl +import nextflow.script.dsl.WorkflowPublishDsl /** * Any user defined script will extends this class, it provides the base execution context * @@ -115,7 +115,7 @@ abstract class BaseScript extends Script implements ExecutionContext { } protected output(Closure closure) { - final dsl = new OutputDsl() + final dsl = new WorkflowPublishDsl() final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/dsl/WorkflowPublishDsl.groovy similarity index 63% rename from modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy rename to modules/nextflow/src/main/groovy/nextflow/script/dsl/WorkflowPublishDsl.groovy index 3cbc6d8ca4..51b155612f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/dsl/OutputDsl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/dsl/WorkflowPublishDsl.groovy @@ -22,17 +22,17 @@ import groovy.transform.CompileStatic import nextflow.script.OutputSelector import nextflow.script.WorkflowPublisher /** - * Implements the DSL for top-level workflow outputs + * Implements the DSL for publishing workflow outputs * * @author Ben Sherman */ @CompileStatic -class OutputDsl { +class WorkflowPublishDsl { private List selectors = [] void path(Map opts=[:], String path, Closure closure) { - final dsl = new OutputPathDsl(this, Path.of(path), opts) + final dsl = new PathDsl(Path.of(path), opts) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) @@ -50,30 +50,27 @@ class OutputDsl { WorkflowPublisher build() { new WorkflowPublisher(selectors) } -} -@CompileStatic -class OutputPathDsl { + class PathDsl { - private OutputDsl root - private Path path - private Map defaults + private Path path + private Map defaults - OutputPathDsl(OutputDsl root, Path path, Map defaults) { - this.root = root - this.path = path - this.defaults = defaults - } + PathDsl(Path path, Map defaults) { + this.path = path + this.defaults = defaults + } - void path(Map opts=[:], String subpath, Closure closure) { - final dsl = new OutputPathDsl(root, path.resolve(subpath), defaults + opts) - final cl = (Closure)closure.clone() - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) - cl.call() - } + void path(Map opts=[:], String subpath, Closure closure) { + final dsl = new PathDsl(path.resolve(subpath), defaults + opts) + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + } - void select(Map opts=[:], String name) { - root.addSelector(new OutputSelector(name, path, defaults + opts)) + void select(Map opts=[:], String name) { + this.addSelector(new OutputSelector(name, path, defaults + opts)) + } } } From b238a81b1e4a56daf10aa9cf5bd414c91c3e89d6 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Mar 2024 19:37:26 -0500 Subject: [PATCH 06/47] Replace process selector with channel/topic selectors, add e2e test Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Session.groovy | 9 -- .../groovy/nextflow/script/BaseScript.groovy | 13 ++- .../nextflow/script/ProcessConfig.groovy | 8 +- .../groovy/nextflow/script/WorkflowDef.groovy | 84 +++++++++++++++++++ .../nextflow/script/WorkflowPublisher.groovy | 81 ------------------ .../script/dsl/WorkflowPublishDsl.groovy | 76 ----------------- tests/publish-dsl.nf | 82 ++++++++++++++++++ 7 files changed, 173 insertions(+), 180 deletions(-) delete mode 100644 modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy delete mode 100644 modules/nextflow/src/main/groovy/nextflow/script/dsl/WorkflowPublishDsl.groovy create mode 100644 tests/publish-dsl.nf diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 32ea5609cd..5ca23559d7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -63,7 +63,6 @@ import nextflow.script.ScriptFile import nextflow.script.ScriptMeta import nextflow.script.ScriptRunner import nextflow.script.WorkflowMetadata -import nextflow.script.WorkflowPublisher import nextflow.spack.SpackConfig import nextflow.trace.AnsiLogObserver import nextflow.trace.TraceObserver @@ -276,8 +275,6 @@ class Session implements ISession { AnsiLogObserver ansiLogObserver - WorkflowPublisher publisher - FilePorter getFilePorter() { filePorter } /** @@ -1039,9 +1036,6 @@ class Session implements ISession { final trace = handler.safeTraceRecord() cache.putTaskAsync(handler, trace) - // notfiy the workflow publisher - publisher.publish(handler.task) - // notify the event to the observers for( int i=0; i, Cloneable { final isNegated = pattern.startsWith('!') if( isNegated ) pattern = pattern.substring(1).trim() - return compilePattern(pattern).matcher(name).matches() ^ isNegated - } - - @Memoized(maxCacheSize = 10_000) - private static Pattern compilePattern(String pattern) { - Pattern.compile(pattern) + return Pattern.compile(pattern).matcher(name).matches() ^ isNegated } /** diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index b540a53451..4c384f75c3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -16,14 +16,18 @@ package nextflow.script +import java.nio.file.Path + import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowWriteChannel +import nextflow.Channel import nextflow.exception.MissingProcessException import nextflow.exception.MissingValueException import nextflow.exception.ScriptRuntimeException import nextflow.extension.CH +import nextflow.extension.PublishOp /** * Models a script workflow component * @@ -52,6 +56,8 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec private WorkflowBinding binding + private Closure publisher + WorkflowDef(BaseScript owner, Closure rawBody, String name=null) { this.owner = owner this.name = name @@ -70,6 +76,10 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec /* ONLY FOR TESTING PURPOSE */ protected WorkflowDef() {} + void setPublisher(Closure publisher) { + this.publisher = publisher + } + WorkflowDef clone() { final copy = (WorkflowDef)super.clone() copy.@body = body.clone() @@ -204,6 +214,14 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec closure.call() // collect the workflow outputs output = collectOutputs(declaredOutputs) + // publish the workflow outputs + if( publisher ) { + final dsl = new WorkflowPublishDsl(binding) + final cl = (Closure)publisher.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + } return output } @@ -254,3 +272,69 @@ class WorkflowParamsResolver { return opts } } + +/** + * Implements the DSL for publishing workflow outputs + * + * @author Ben Sherman + */ +@CompileStatic +class WorkflowPublishDsl { + + private Binding binding + + WorkflowPublishDsl(Binding binding) { + this.binding = binding + } + + @Override + Object getProperty(String name) { + try { + return binding.getProperty(name) + } + catch( MissingPropertyException e ){ + return super.getProperty(name) + } + } + + void path(Map opts=[:], String path, Closure closure) { + final dsl = new PathDsl(Path.of(path), opts) + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + } + + class PathDsl { + + private Path path + private Map defaults + + PathDsl(Path path, Map defaults) { + this.path = path + this.defaults = defaults + } + + void path(Map opts=[:], String subpath, Closure closure) { + final dsl = new PathDsl(path.resolve(subpath), defaults + opts) + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + } + + void select(Map opts=[:], DataflowWriteChannel source) { + new PublishOp(CH.getReadChannel(source), defaults + opts + [path: path]).apply() + } + + void select(Map opts=[:], ChannelOut out) { + if( out.size() != 1 ) + throw new IllegalArgumentException("Cannot publish a multi-channel output") + select(opts, out[0]) + } + + void topic(Map opts=[:], String name) { + select(opts, Channel.topic(name)) + } + } +} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy deleted file mode 100644 index 08f825760c..0000000000 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowPublisher.groovy +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.script - -import java.nio.file.Path - -import groovy.transform.CompileStatic -import nextflow.Const -import nextflow.processor.PublishDir -import nextflow.processor.TaskRun -import nextflow.script.params.FileOutParam -import nextflow.script.ProcessConfig -/** - * Models the workflow outputs definition and publishing - * - * @author Ben Sherman - */ -@CompileStatic -class WorkflowPublisher { - private List selectors - - WorkflowPublisher(List selectors) { - this.selectors = selectors - } - - void publish(TaskRun task) { - // collect task output files - HashSet files = [] - final outputs = task.getOutputsByType(FileOutParam) - for( Map.Entry entry : outputs ) { - final value = entry.value - if( value instanceof Path ) - files.add((Path)value) - else if( value instanceof Collection ) - files.addAll(value) - else if( value != null ) - throw new IllegalArgumentException("Unknown output file object [${value.class.name}]: ${value}") - } - - // apply each publisher with matching process selector to task - final processName = task.processor.name - final simpleName = processName.split(Const.SCOPE_SEP).last() - for( final selector : selectors ) { - if( ProcessConfig.matchesSelector(simpleName, selector.name) || ProcessConfig.matchesSelector(processName, selector.name) ) { - final params = [ - path: selector.path, - failOnError: true, - overwrite: !task.cached - ] - PublishDir.create(params + selector.opts).apply(files, task) - } - } - } -} - -@CompileStatic -class OutputSelector { - String name - Path path - Map opts - - OutputSelector(String name, Path path, Map opts) { - this.name = name - this.path = path - this.opts = opts - } -} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/dsl/WorkflowPublishDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/dsl/WorkflowPublishDsl.groovy deleted file mode 100644 index 51b155612f..0000000000 --- a/modules/nextflow/src/main/groovy/nextflow/script/dsl/WorkflowPublishDsl.groovy +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.script.dsl - -import java.nio.file.Path - -import groovy.transform.CompileStatic -import nextflow.script.OutputSelector -import nextflow.script.WorkflowPublisher -/** - * Implements the DSL for publishing workflow outputs - * - * @author Ben Sherman - */ -@CompileStatic -class WorkflowPublishDsl { - - private List selectors = [] - - void path(Map opts=[:], String path, Closure closure) { - final dsl = new PathDsl(Path.of(path), opts) - final cl = (Closure)closure.clone() - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) - cl.call() - } - - void select(Map opts=[:], String name) { - this.selectors << new OutputSelector(name, Path.of('.'), opts) - } - - void addSelector(OutputSelector selector) { - this.selectors << selector - } - - WorkflowPublisher build() { - new WorkflowPublisher(selectors) - } - - class PathDsl { - - private Path path - private Map defaults - - PathDsl(Path path, Map defaults) { - this.path = path - this.defaults = defaults - } - - void path(Map opts=[:], String subpath, Closure closure) { - final dsl = new PathDsl(path.resolve(subpath), defaults + opts) - final cl = (Closure)closure.clone() - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) - cl.call() - } - - void select(Map opts=[:], String name) { - this.addSelector(new OutputSelector(name, path, defaults + opts)) - } - } -} diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf new file mode 100644 index 0000000000..730b65a881 --- /dev/null +++ b/tests/publish-dsl.nf @@ -0,0 +1,82 @@ +#!/usr/bin/env nextflow +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +process align { + input: + val(x) + + output: + path("*.bam") + path("${x}.bai") + + """ + echo ${x} > ${x}.bam + echo ${x} | rev > ${x}.bai + """ +} + +process my_combine { + input: + path(bamfile) + path(baifile) + + output: + path 'result.txt' + + """ + cat $bamfile > result.txt + cat $baifile >> result.txt + """ +} + +process foo { + output: + file 'xxx' + + ''' + mkdir xxx + touch xxx/A + touch xxx/B + touch xxx/C + ''' +} + +workflow { + def input = Channel.of('alpha','beta','delta') + align(input) + + def bam = align.out[0].toSortedList { it.name } + def bai = align.out[1].toSortedList { it.name } + my_combine( bam, bai ) + my_combine.out.view{ it.text } + + foo() +} + +output { + path('data') { + select align.out[0], mode: 'copy' + select align.out[1], mode: 'copy' + select my_combine.out + select foo.out, mode: 'link' + } + + path('more/data') { + select my_combine.out, mode: 'copy' + } +} From d0fa980f09e914a4135a57f3ea2bdff902a6bda4 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Mar 2024 19:54:01 -0500 Subject: [PATCH 07/47] cleanup PublishOp Signed-off-by: Ben Sherman --- .../nextflow/extension/PublishOp.groovy | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index aa4bda7c17..da8a1004e3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -40,29 +40,17 @@ class PublishOp { private Path sourceDir - private volatile boolean complete - private Session getSession() { Global.session as Session } PublishOp(DataflowReadChannel source, Map opts) { this.source = source this.opts = opts ? new LinkedHashMap(opts) : Collections.emptyMap() - - // adapt `to` option - if( this.opts.containsKey('to') ) { - this.opts.path = this.opts.to - this.opts.remove('to') - } - this.publisher = PublishDir.create(this.opts) } - protected boolean getComplete() { complete } - PublishOp apply() { final events = new HashMap(2) events.onNext = this.&publish0 - events.onComplete = this.&done0 DataflowHelper.subscribeImpl(source, events) return this } @@ -76,11 +64,6 @@ class PublishOp { publisher.apply(result, sourceDir) } - protected void done0(nope) { - log.debug "Publish operator complete" - this.complete = true - } - protected void collectFiles(entry, Collection result) { if( entry instanceof Path ) { result.add(entry) @@ -103,16 +86,18 @@ class PublishOp { * @return */ protected Path getTaskDir(Path path) { - if( path==null ) + if( path == null ) return null - def result = getTaskDir0(path, session.workDir) + def result = getTaskDir0(path, session.workDir.resolve('tmp')) + if( result == null ) + result = getTaskDir0(path, session.workDir) if( result == null ) result = getTaskDir0(path, session.bucketDir) return result } private Path getTaskDir0(Path file, Path base) { - if( base==null ) + if( base == null ) return null if( base.fileSystem != file.fileSystem ) return null From 82797430d505f368ee63bc3fd734bbece44327be Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 22 Mar 2024 14:15:21 -0500 Subject: [PATCH 08/47] Add topic operator (not working) Signed-off-by: Ben Sherman --- .../nextflow/extension/IntoTopicOp.groovy | 58 +++++++++++++++++++ .../nextflow/extension/OperatorImpl.groovy | 9 +++ 2 files changed, 67 insertions(+) create mode 100644 modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy new file mode 100644 index 0000000000..092afa7493 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy @@ -0,0 +1,58 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.extension + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import groovyx.gpars.dataflow.DataflowReadChannel +import groovyx.gpars.dataflow.DataflowWriteChannel +import groovyx.gpars.dataflow.operator.ChainWithClosure +import groovyx.gpars.dataflow.operator.CopyChannelsClosure +import static nextflow.extension.DataflowHelper.newOperator +/** + * Implements the {@link OperatorImpl#topic} operator + * + * @author Ben Sherman + */ +@Slf4j +@CompileStatic +class IntoTopicOp { + + private DataflowReadChannel source + + private String name + + private List outputs + + IntoTopicOp( DataflowReadChannel source, String name ) { + this.source = source + this.name = name + } + + DataflowWriteChannel apply() { + final target = CH.createBy(source) + final topicSource = CH.createTopicSource(name) + this.outputs = [target, topicSource] + newOperator([source], outputs, new ChainWithClosure(new CopyChannelsClosure())) + return target + } + + List getOutputs() { + return outputs + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index 63970a8c55..41a08ef1e3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -43,6 +43,7 @@ import nextflow.splitter.FastaSplitter import nextflow.splitter.FastqSplitter import nextflow.splitter.JsonSplitter import nextflow.splitter.TextSplitter +import org.codehaus.groovy.runtime.InvokerHelper import org.codehaus.groovy.runtime.callsite.BooleanReturningMethodInvoker import org.codehaus.groovy.runtime.typehandling.DefaultTypeTransformation /** @@ -1239,4 +1240,12 @@ class OperatorImpl { .getOutput() } + DataflowWriteChannel topic(DataflowReadChannel source, String name) { + if( !NF.topicChannelEnabled ) throw new MissingMethodException('topic', OperatorImpl.class, InvokerHelper.EMPTY_ARGS) + final op = new IntoTopicOp(source, name) + final target = op.apply() + OpCall.current.get().outputs.addAll( op.outputs ) + return target + } + } From 88d7eadda99423e28e5cb262bd64067004c3b546 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 26 Mar 2024 17:24:58 +0100 Subject: [PATCH 09/47] Add topic op test Signed-off-by: Paolo Di Tommaso --- .../nextflow/extension/IntoTopicOp.groovy | 12 +---- .../nextflow/extension/OperatorImpl.groovy | 4 +- .../nextflow/extension/TopicOpTest.groovy | 50 +++++++++++++++++++ 3 files changed, 53 insertions(+), 13 deletions(-) create mode 100644 modules/nextflow/src/test/groovy/nextflow/extension/TopicOpTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy index 092afa7493..cc2028f05f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy @@ -36,23 +36,15 @@ class IntoTopicOp { private String name - private List outputs - IntoTopicOp( DataflowReadChannel source, String name ) { this.source = source this.name = name } DataflowWriteChannel apply() { - final target = CH.createBy(source) - final topicSource = CH.createTopicSource(name) - this.outputs = [target, topicSource] - newOperator([source], outputs, new ChainWithClosure(new CopyChannelsClosure())) + final target = CH.createTopicSource(name) + newOperator(source, target, new ChainWithClosure(new CopyChannelsClosure())) return target } - List getOutputs() { - return outputs - } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index 41a08ef1e3..df8498d763 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -1243,9 +1243,7 @@ class OperatorImpl { DataflowWriteChannel topic(DataflowReadChannel source, String name) { if( !NF.topicChannelEnabled ) throw new MissingMethodException('topic', OperatorImpl.class, InvokerHelper.EMPTY_ARGS) final op = new IntoTopicOp(source, name) - final target = op.apply() - OpCall.current.get().outputs.addAll( op.outputs ) - return target + return op.apply() } } diff --git a/modules/nextflow/src/test/groovy/nextflow/extension/TopicOpTest.groovy b/modules/nextflow/src/test/groovy/nextflow/extension/TopicOpTest.groovy new file mode 100644 index 0000000000..0f3b40780d --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/extension/TopicOpTest.groovy @@ -0,0 +1,50 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package nextflow.extension + +import nextflow.Channel +import test.Dsl2Spec +import test.MockScriptRunner + +/** + * + * @author Paolo Di Tommaso + */ +class TopicOpTest extends Dsl2Spec { + + def 'should define a process with output alias' () { + given: + def SCRIPT = ''' + nextflow.preview.topic = true + + Channel.of(1,2,3) | topic('foo') + + ''' + + when: + def runner = new MockScriptRunner() + def result = runner.setScript(SCRIPT).execute() + then: + result.getVal() == 1 + result.getVal() == 2 + result.getVal() == 3 + and: + result.getVal() == Channel.STOP + } + +} From 1e3d539e1e2d04b3b19fd4ddc956b7ba85c6d88a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 03:40:26 -0500 Subject: [PATCH 10/47] clean up e2e test Signed-off-by: Ben Sherman --- tests/publish-dsl.nf | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index 730b65a881..b54b6fe833 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -17,36 +17,36 @@ process align { - input: - val(x) + input: + val(x) - output: - path("*.bam") - path("${x}.bai") + output: + path("*.bam") + path("${x}.bai") - """ - echo ${x} > ${x}.bam - echo ${x} | rev > ${x}.bai - """ + """ + echo ${x} > ${x}.bam + echo ${x} | rev > ${x}.bai + """ } process my_combine { - input: - path(bamfile) - path(baifile) + input: + path(bamfile) + path(baifile) - output: - path 'result.txt' + output: + path 'result.txt' - """ - cat $bamfile > result.txt - cat $baifile >> result.txt - """ + """ + cat $bamfile > result.txt + cat $baifile >> result.txt + """ } process foo { output: - file 'xxx' + path 'xxx' ''' mkdir xxx From 925bc6749b7bc395e93cdb055be9073e5e52b9aa Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 03:41:50 -0500 Subject: [PATCH 11/47] Fix issue with topic operator Signed-off-by: Ben Sherman --- .../nextflow/extension/IntoTopicOp.groovy | 43 ++++++++++++++++--- .../nextflow/extension/OperatorImpl.groovy | 6 +-- ...icOpTest.groovy => IntoTopicOpTest.groovy} | 19 ++++---- tests/publish-dsl.nf | 6 +-- 4 files changed, 53 insertions(+), 21 deletions(-) rename modules/nextflow/src/test/groovy/nextflow/extension/{TopicOpTest.groovy => IntoTopicOpTest.groovy} (75%) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy index cc2028f05f..0c66baa0c5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy @@ -19,10 +19,15 @@ package nextflow.extension import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowReadChannel -import groovyx.gpars.dataflow.DataflowWriteChannel +import groovyx.gpars.dataflow.expression.DataflowExpression import groovyx.gpars.dataflow.operator.ChainWithClosure import groovyx.gpars.dataflow.operator.CopyChannelsClosure -import static nextflow.extension.DataflowHelper.newOperator +import groovyx.gpars.dataflow.operator.DataflowEventAdapter +import groovyx.gpars.dataflow.operator.DataflowProcessor +import nextflow.Channel +import nextflow.Global +import nextflow.Session +import nextflow.extension.DataflowHelper /** * Implements the {@link OperatorImpl#topic} operator * @@ -36,15 +41,43 @@ class IntoTopicOp { private String name + private Session session = Global.session as Session + IntoTopicOp( DataflowReadChannel source, String name ) { this.source = source this.name = name } - DataflowWriteChannel apply() { + void apply() { final target = CH.createTopicSource(name) - newOperator(source, target, new ChainWithClosure(new CopyChannelsClosure())) - return target + final listener = new DataflowEventAdapter() { + @Override + void afterRun(DataflowProcessor processor, List messages) { + if( source !instanceof DataflowExpression ) + return + // -- terminate the process + processor.terminate() + // -- send a poison pill if needed + if( target !instanceof DataflowExpression ) + target.bind(Channel.STOP) + else if( !(target as DataflowExpression).isBound() ) + target.bind(Channel.STOP) + } + + @Override + public boolean onException(final DataflowProcessor processor, final Throwable e) { + log.error("@unknown", e) + session.abort(e) + return true + } + } + + final params = [ + inputs: List.of(source), + outputs: List.of(target), + listeners: List.of(listener) + ] + DataflowHelper.newOperator(params, new ChainWithClosure(new CopyChannelsClosure())) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index df8498d763..054a48e7a3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -1240,10 +1240,10 @@ class OperatorImpl { .getOutput() } - DataflowWriteChannel topic(DataflowReadChannel source, String name) { + DataflowReadChannel topic(DataflowReadChannel source, String name) { if( !NF.topicChannelEnabled ) throw new MissingMethodException('topic', OperatorImpl.class, InvokerHelper.EMPTY_ARGS) - final op = new IntoTopicOp(source, name) - return op.apply() + new IntoTopicOp(source, name).apply() + return source } } diff --git a/modules/nextflow/src/test/groovy/nextflow/extension/TopicOpTest.groovy b/modules/nextflow/src/test/groovy/nextflow/extension/IntoTopicOpTest.groovy similarity index 75% rename from modules/nextflow/src/test/groovy/nextflow/extension/TopicOpTest.groovy rename to modules/nextflow/src/test/groovy/nextflow/extension/IntoTopicOpTest.groovy index 0f3b40780d..8bc8f82a2f 100644 --- a/modules/nextflow/src/test/groovy/nextflow/extension/TopicOpTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/extension/IntoTopicOpTest.groovy @@ -25,26 +25,25 @@ import test.MockScriptRunner * * @author Paolo Di Tommaso */ -class TopicOpTest extends Dsl2Spec { +class IntoTopicOpTest extends Dsl2Spec { - def 'should define a process with output alias' () { + def 'should send a channel into a topic' () { given: def SCRIPT = ''' nextflow.preview.topic = true - Channel.of(1,2,3) | topic('foo') - + Channel.value(4) | topic('foo') + Channel.topic('foo').collect() ''' when: def runner = new MockScriptRunner() - def result = runner.setScript(SCRIPT).execute() + def result = runner.setScript(SCRIPT).execute().getVal() then: - result.getVal() == 1 - result.getVal() == 2 - result.getVal() == 3 - and: - result.getVal() == Channel.STOP + 1 in result + 2 in result + 3 in result + 4 in result } } diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index b54b6fe833..b17e6e3612 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +nextflow.preview.topic = true process align { input: @@ -65,7 +65,7 @@ workflow { my_combine( bam, bai ) my_combine.out.view{ it.text } - foo() + foo | topic('foo') } output { @@ -73,7 +73,7 @@ output { select align.out[0], mode: 'copy' select align.out[1], mode: 'copy' select my_combine.out - select foo.out, mode: 'link' + topic 'foo', mode: 'link' } path('more/data') { From e4608b32c38af11813dbe7cf91ad0781cf626d63 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 05:51:16 -0500 Subject: [PATCH 12/47] Apply suggestions from review Signed-off-by: Ben Sherman --- .../nextflow/ast/NextflowDSLImpl.groovy | 75 +++++++++++++++++++ .../groovy/nextflow/script/WorkflowDef.groovy | 19 ++++- tests/publish-dsl.nf | 9 ++- 3 files changed, 98 insertions(+), 5 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index 7396a5ab16..481028cd8c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -172,11 +172,17 @@ class NextflowDSLImpl implements ASTTransformation { currentTaskName = null } } + else if( methodName == 'workflow' && preCondition ) { convertWorkflowDef(methodCall,sourceUnit) super.visitMethodCallExpression(methodCall) } + else if( methodName == 'output' && preCondition ) { + convertOutputDef(methodCall,sourceUnit) + super.visitMethodCallExpression(methodCall) + } + // just apply the default behavior else { super.visitMethodCallExpression(methodCall) @@ -488,6 +494,75 @@ class NextflowDSLImpl implements ASTTransformation { unit.addError( new SyntaxException(message,line,coln)) } + /** + * Apply syntax transformations to the output DSL + * + * @param methodCall + * @param unit + */ + protected void convertOutputDef(MethodCallExpression methodCall, SourceUnit unit) { + log.trace "Convert 'output' ${methodCall.arguments}" + + assert methodCall.arguments instanceof ArgumentListExpression + final args = (ArgumentListExpression)methodCall.arguments + + if( args.size() != 1 || args[0] !instanceof ClosureExpression ) { + syntaxError(methodCall, "Invalid output definition") + return + } + + fixOutputPath( (ClosureExpression)args[0] ) + } + + /** + * Fix path declaration in output DSL: + * + * output { + * 'results' { ... } + * } + * + * becomes: + * + * output { + * path('results') { ... } + * } + * + * @param body + */ + protected void fixOutputPath(ClosureExpression body) { + final block = (BlockStatement)body.code + for( Statement stmt : block.statements ) { + if( stmt !instanceof ExpressionStatement ) + continue + + final stmtExpr = (ExpressionStatement)stmt + if( stmtExpr.expression !instanceof MethodCallExpression ) + continue + + final methodCall = (MethodCallExpression)stmtExpr.expression + if( methodCall.arguments !instanceof ArgumentListExpression ) + continue + + // HACK: detect implicit path() call as method call with single closure argument + // custom parser will be able to detect more elegantly + final args = (ArgumentListExpression)methodCall.arguments + if( args.size() != 1 || args[0] !instanceof ClosureExpression ) + continue + + final pathName = methodCall.getMethodAsString() + final pathBody = (ClosureExpression)args[0] + final pathCall = new MethodCallExpression( + new VariableExpression('this'), + 'path', + new ArgumentListExpression(constX(pathName), pathBody) + ) + stmtExpr.setExpression(pathCall) + + // recursively check nested path calls + fixOutputPath(pathBody) + } + } + /** * Transform a DSL `process` definition into a proper method invocation * diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index 4c384f75c3..91ffe98f62 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -283,6 +283,8 @@ class WorkflowPublishDsl { private Binding binding + private Path directory = Path.of('.') + WorkflowPublishDsl(Binding binding) { this.binding = binding } @@ -297,8 +299,12 @@ class WorkflowPublishDsl { } } - void path(Map opts=[:], String path, Closure closure) { - final dsl = new PathDsl(Path.of(path), opts) + void directory(String directory) { + this.directory = Path.of(directory) + } + + void path(String path, Closure closure) { + final dsl = new PathDsl(directory.resolve(path), [:]) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) @@ -309,12 +315,20 @@ class WorkflowPublishDsl { private Path path private Map defaults + private boolean defaultsOnce = false PathDsl(Path path, Map defaults) { this.path = path this.defaults = defaults } + void defaults(Map opts) { + if( defaultsOnce ) + throw new ScriptRuntimeException("Publish defaults cannot be defined more than once for a given path") + defaultsOnce = true + defaults.putAll(opts) + } + void path(Map opts=[:], String subpath, Closure closure) { final dsl = new PathDsl(path.resolve(subpath), defaults + opts) final cl = (Closure)closure.clone() @@ -336,5 +350,6 @@ class WorkflowPublishDsl { void topic(Map opts=[:], String name) { select(opts, Channel.topic(name)) } + } } diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index b17e6e3612..3a5c655971 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -69,14 +69,17 @@ workflow { } output { - path('data') { + directory 'results' + + 'data' { select align.out[0], mode: 'copy' select align.out[1], mode: 'copy' select my_combine.out topic 'foo', mode: 'link' } - path('more/data') { - select my_combine.out, mode: 'copy' + 'data/more' { + defaults mode: 'copy' + select my_combine.out } } From 1e69f3ed92ca51178f49e70819e2131f4429369b Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 08:59:23 -0500 Subject: [PATCH 13/47] Add output directory option to CLI, config, output DSL Signed-off-by: Ben Sherman --- .../nextflow/src/main/groovy/nextflow/Session.groovy | 8 ++++++++ .../src/main/groovy/nextflow/cli/CmdRun.groovy | 3 +++ .../main/groovy/nextflow/config/ConfigBuilder.groovy | 4 ++++ .../main/groovy/nextflow/script/WorkflowDef.groovy | 12 ++++++++++-- .../groovy/nextflow/script/WorkflowMetadata.groovy | 6 ++++++ 5 files changed, 31 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 5ca23559d7..53e9d2e348 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -119,6 +119,11 @@ class Session implements ISession { */ boolean resumeMode + /** + * The folder where pipeline results are published + */ + Path outputDir + /** * The folder where tasks temporary files are stored */ @@ -362,6 +367,9 @@ class Session implements ISession { // -- DAG object this.dag = new DAG() + // -- init output dir + this.outputDir = ((config.outputDir ?: '.') as Path).complete() + // -- init work dir this.workDir = ((config.workDir ?: 'work') as Path).complete() this.setLibDir( config.libDir as String ) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index bca2166379..57661c552a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -107,6 +107,9 @@ class CmdRun extends CmdBase implements HubOptions { @Parameter(names=['-test'], description = 'Test a script function with the name specified') String test + @Parameter(names=['-o', '-output-dir'], description = 'Directory where workflow outputs are published') + String outputDir + @Parameter(names=['-w', '-work-dir'], description = 'Directory where intermediate result files are stored') String workDir diff --git a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy index 362a25af1d..70e25434c4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy @@ -545,6 +545,10 @@ class ConfigBuilder { if( cmdRun.stubRun ) config.stubRun = cmdRun.stubRun + // -- set the output directory + if( cmdRun.outputDir ) + config.outputDir = cmdRun.outputDir + // -- sets the working directory if( cmdRun.workDir ) config.workDir = cmdRun.workDir diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index 91ffe98f62..7014a1dd3b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -23,6 +23,8 @@ import groovy.transform.PackageScope import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Channel +import nextflow.Global +import nextflow.Session import nextflow.exception.MissingProcessException import nextflow.exception.MissingValueException import nextflow.exception.ScriptRuntimeException @@ -283,7 +285,9 @@ class WorkflowPublishDsl { private Binding binding - private Path directory = Path.of('.') + private Path directory = (Global.session as Session).outputDir + + private boolean directoryOnce = false WorkflowPublishDsl(Binding binding) { this.binding = binding @@ -300,7 +304,11 @@ class WorkflowPublishDsl { } void directory(String directory) { - this.directory = Path.of(directory) + if( directoryOnce ) + throw new ScriptRuntimeException("Output directory cannot be defined more than once in the workflow output definition") + directoryOnce = true + + this.directory = (directory as Path).complete() } void path(String path, Closure closure) { diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy index 07394e5e55..1d7ce3e5da 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy @@ -139,6 +139,11 @@ class WorkflowMetadata { */ Path launchDir + /** + * Workflow output directory + */ + Path outputDir + /** * Workflow working directory */ @@ -234,6 +239,7 @@ class WorkflowMetadata { this.container = session.fetchContainers() this.commandLine = session.commandLine this.nextflow = NextflowMeta.instance + this.outputDir = session.outputDir this.workDir = session.workDir this.launchDir = Paths.get('.').complete() this.profile = session.profile ?: ConfigBuilder.DEFAULT_PROFILE From 01b0570f1d7f49fcc12f431c1201b4d414670418 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 09:04:56 -0500 Subject: [PATCH 14/47] Validate publish options Signed-off-by: Ben Sherman --- .../groovy/nextflow/script/WorkflowDef.groovy | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index 7014a1dd3b..75058b56d3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -283,6 +283,18 @@ class WorkflowParamsResolver { @CompileStatic class WorkflowPublishDsl { + private static final List PUBLISH_OPTIONS = List.of( + 'contentType', + 'enabled', + 'failOnError', + 'mode', + 'overwrite', + 'pattern', + 'saveAs', + 'storageClass', + 'tags' + ) + private Binding binding private Path directory = (Global.session as Session).outputDir @@ -334,11 +346,13 @@ class WorkflowPublishDsl { if( defaultsOnce ) throw new ScriptRuntimeException("Publish defaults cannot be defined more than once for a given path") defaultsOnce = true + + validatePublishOptions(opts) defaults.putAll(opts) } - void path(Map opts=[:], String subpath, Closure closure) { - final dsl = new PathDsl(path.resolve(subpath), defaults + opts) + void path(String subpath, Closure closure) { + final dsl = new PathDsl(path.resolve(subpath), defaults) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) @@ -346,6 +360,7 @@ class WorkflowPublishDsl { } void select(Map opts=[:], DataflowWriteChannel source) { + validatePublishOptions(opts) new PublishOp(CH.getReadChannel(source), defaults + opts + [path: path]).apply() } @@ -359,5 +374,16 @@ class WorkflowPublishDsl { select(opts, Channel.topic(name)) } + private void validatePublishOptions(Map opts) { + for( final name : opts.keySet() ) { + if( name !in PUBLISH_OPTIONS ) { + final msg = name == 'path' + ? "Publish option 'path' is not allowed in the workflow output definition, use path definitions instead" + : "Unrecognized publish option '${name}' in the workflow output definition".toString() + throw new IllegalArgumentException(msg) + } + } + } + } } From c43afad159227751a4cf8f616b7b45dfb697d595 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 10:10:22 -0500 Subject: [PATCH 15/47] Update docs Signed-off-by: Ben Sherman --- docs/channel.md | 2 +- docs/cli.md | 7 + docs/config.md | 5 + docs/metadata.md | 5 + docs/operator.md | 29 +++ docs/process.md | 4 + docs/snippets/topic.nf | 4 + docs/snippets/topic.out | 3 + docs/workflow.md | 389 ++++++++++++++++++++++++++++++---------- 9 files changed, 353 insertions(+), 95 deletions(-) create mode 100644 docs/snippets/topic.nf create mode 100644 docs/snippets/topic.out diff --git a/docs/channel.md b/docs/channel.md index 7d175c5fe1..5d615d1371 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -398,7 +398,7 @@ The `interval` method emits an incrementing index (starting from zero) at a peri Channel.interval('1s').view() ``` -The above snippet will emit 0, 1, 2, and so on, every second, forever. You can use an operator such as {ref}`operator-take`, {ref}`operator-timeout`, or {ref}`operator-until` to close the channel based on a stopping condition. +The above snippet will emit 0, 1, 2, and so on, every second, forever. You can use an operator such as {ref}`operator-take` or {ref}`operator-until` to close the channel based on a stopping condition. An optional closure can be used to transform the index. Additionally, returning `Channel.STOP` will close the channel. For example: diff --git a/docs/cli.md b/docs/cli.md index 3a70d64029..45829bc585 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1109,6 +1109,8 @@ Checking nextflow-io/hello ... checkout-out at AnyObjectId[1c3e9e7404127514d69369cd87f8036830f5cf64] - revision: 1c3e9e7404 [v1.1] ``` +(cli-run)= + ### run Execute a pipeline. @@ -1191,6 +1193,11 @@ The `run` command is used to execute a local pipeline script or remote pipeline `-offline` : Do not check for remote project updates. +`-o, -output-dir` +: :::{versionadded} 24.04.0 + ::: +: Directory where workflow outputs are published. + `-params-file` : Load script parameters from a JSON/YAML file. diff --git a/docs/config.md b/docs/config.md index c46cfbd85a..f4597b8b9d 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1704,6 +1704,11 @@ There are additional variables that can be defined within a configuration file t `dumpHashes` : If `true`, dump task hash keys in the log file, for debugging purposes. Equivalent to the `-dump-hashes` option of the `run` command. +`outputDir` +: :::{versionadded} 24.04.0 + ::: +: Defines the pipeline output directory. Equivalent to the `-output-dir` option of the `run` command. + `resume` : If `true`, enable the use of previously cached task executions. Equivalent to the `-resume` option of the `run` command. diff --git a/docs/metadata.md b/docs/metadata.md index 529060bb98..782d079f25 100644 --- a/docs/metadata.md +++ b/docs/metadata.md @@ -67,6 +67,11 @@ The following table lists the properties that can be accessed on the `workflow` `workflow.manifest` : Entries of the workflow manifest. +`workflow.outputDir` +: :::{versionadded} 24.04.0 + ::: +: Directory where workflow outputs are published. + `workflow.profile` : Used configuration profile. diff --git a/docs/operator.md b/docs/operator.md index 4e7f415010..02545a68ff 100644 --- a/docs/operator.md +++ b/docs/operator.md @@ -1466,6 +1466,8 @@ An optional {ref}`closure ` can be used to transform each item b :language: console ``` +(operator-take)= + ## take *Returns: queue channel* @@ -1591,6 +1593,31 @@ collect(flat: false, sort: true).ifEmpty([]) See also: [collect](#collect) +(operator-topic)= + +## topic + +:::{versionadded} 24.04.0 +::: + +:::{note} +This feature requires the `nextflow.preview.topic` feature flag to be enabled. +::: + +*Returns: the source channel* + +The `topic` operator sends each value from a source channel to a given {ref}`channel topic `. + +For example: + +```{literalinclude} snippets/topic.nf +:language: groovy +``` + +```{literalinclude} snippets/topic.out +:language: console +``` + ## transpose *Returns: queue channel* @@ -1669,6 +1696,8 @@ The difference between `unique` and `distinct` is that `unique` removes *all* du See also: [distinct](#distinct) +(operator-until)= + ## until *Returns: queue channel* diff --git a/docs/process.md b/docs/process.md index d619a713b3..1ab5c029f0 100644 --- a/docs/process.md +++ b/docs/process.md @@ -2163,6 +2163,10 @@ The following options are available: ### publishDir +:::{deprecated} 24.04.0 +The `publishDir` directive has been deprecated in favor of the new {ref}`workflow output definition `. +::: + The `publishDir` directive allows you to publish the process output files to a specified folder. For example: ```groovy diff --git a/docs/snippets/topic.nf b/docs/snippets/topic.nf new file mode 100644 index 0000000000..c55e63158c --- /dev/null +++ b/docs/snippets/topic.nf @@ -0,0 +1,4 @@ +nextflow.preview.topic = true + +Channel.of(1,2,3) | topic('foo') +Channel.topic('foo').view() \ No newline at end of file diff --git a/docs/snippets/topic.out b/docs/snippets/topic.out new file mode 100644 index 0000000000..5f5fbe759f --- /dev/null +++ b/docs/snippets/topic.out @@ -0,0 +1,3 @@ +1 +2 +3 \ No newline at end of file diff --git a/docs/workflow.md b/docs/workflow.md index 6bc45f1015..47c8b498e5 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -41,9 +41,115 @@ The `main:` label can be omitted if there are no `take:` or `emit:` blocks. Workflows were introduced in DSL2. If you are still using DSL1, see the {ref}`dsl1-page` page to learn how to migrate your Nextflow pipelines to DSL2. ::: +## Implicit workflow + +A script can define a single workflow without a name (also known as the *implicit workflow*), which is the default entrypoint of the script. The `-entry` command line option can be used to execute a different workflow as the entrypoint at runtime. + +:::{note} +Implicit workflow definitions are ignored when a script is included as a module. This way, a script can be written such that it can be either imported as a module or executed as a pipeline. +::: + +## Named workflows + +A named workflow is a "subworkflow" that can be invoked from other workflows. For example: + +```groovy +workflow my_pipeline { + foo() + bar( foo.out.collect() ) +} + +workflow { + my_pipeline() +} +``` + +The above snippet defines a workflow named `my_pipeline`, that can be invoked from another workflow as `my_pipeline()`, just like any other function or process. + +## Using variables and params + +A workflow can access any variable or parameter defined in the global scope: + +```groovy +params.data = '/some/data/file' + +workflow { + if( params.data ) + bar(params.data) + else + bar(foo()) +} +``` + +:::{tip} +The use of global variables and params in subworkflows is discouraged because it breaks the modularity of the workflow. As a best practice, every workflow input should be explicitly defined as such in the `take:` block, and params should only be used in the implicit workflow. +::: + +## Workflow inputs (`take`) + +A workflow can declare one or more input channels using the `take` keyword. For example: + +```groovy +workflow my_pipeline { + take: + data1 + data2 + + main: + foo(data1, data2) + bar(foo.out) +} +``` + +:::{warning} +When the `take` keyword is used, the beginning of the workflow body must be defined with the `main` keyword. +::: + +Inputs can be specified like arguments when invoking the workflow: + +```groovy +workflow { + my_pipeline( channel.from('/some/data') ) +} +``` + +## Workflow outputs (`emit`) + +A workflow can declare one or more output channels using the `emit` keyword. For example: + +```groovy +workflow my_pipeline { + main: + foo(data) + bar(foo.out) + + emit: + bar.out +} +``` + +When invoking the workflow, the output channel(s) can be accessed using the `out` property, i.e. `my_pipeline.out`. When multiple output channels are declared, use the array bracket notation or the assignment syntax to access each output channel as described for [process outputs](#process-outputs). + +### Named outputs + +If an output channel is assigned to an identifier in the `emit` block, the identifier can be used to reference the channel from the calling workflow. For example: + +```groovy +workflow my_pipeline { + main: + foo(data) + bar(foo.out) + + emit: + my_data = bar.out +} +``` + +The result of the above workflow can be accessed using `my_pipeline.out.my_data`. + (workflow-process-invocation)= -## Process invocation +## Invoking processes A process can be invoked like a function in a workflow definition, passing the expected input channels like function arguments. For example: @@ -116,7 +222,7 @@ workflow { } ``` -### Process named outputs +#### Named outputs The `emit` option can be added to the process output definition to assign a name identifier. This name can be used to reference the channel from the calling workflow. For example: @@ -146,9 +252,10 @@ workflow { See {ref}`process outputs ` for more details. -### Process named stdout +#### Named stdout + +The `emit` option can also be used to name a `stdout` output. However, while process output options are usually prefixed with a comma, this is not the case for `stdout`. This is because `stdout` does not have an argument like other types. -The `emit` option can also be used to name a `stdout` output: ```groovy process sayHello { @@ -171,161 +278,255 @@ workflow { } ``` +## Invoking subworkflows + +Named workflows can be invoked and composed just like any other process or function. + +```groovy +workflow flow1 { + take: data + main: + foo(data) + bar(foo.out) + emit: + bar.out +} + +workflow flow2 { + take: data + main: + foo(data) + baz(foo.out) + emit: + baz.out +} + +workflow { + take: data + main: + flow1(data) + flow2(flow1.out) +} +``` + :::{note} -Optional params for a process input/output are always prefixed with a comma, except for `stdout`. Because `stdout` does not have an associated name or value like other types, the first param should not be prefixed. +Each workflow invocation has its own scope. As a result, the same process can be invoked in two different workflow scopes, like `foo` in the above snippet, which is used in both `flow1` and `flow2`. The workflow execution path, along with the process names, determines the *fully qualified process name* that is used to distinguish the different process invocations, i.e. `flow1:foo` and `flow2:foo` in the above example. +::: + +:::{tip} +The fully qualified process name can be used as a {ref}`process selector ` in a Nextflow configuration file, and it takes priority over the simple process name. ::: -## Subworkflows +(workflow-output-dsl)= -A named workflow is a "subworkflow" that can be invoked from other workflows. For example: +## Publishing outputs + +:::{versionadded} 24.04.0 +::: + +A script may define the set of outputs that should be published by the implicit workflow, known as the workflow output definition or "output block": ```groovy -workflow my_pipeline { - foo() - bar( foo.out.collect() ) +workflow { + foo(bar()) } -workflow { - my_pipeline() +output { + directory 'results' + + 'foo' { + select foo.out + } + + 'bar' { + defaults mode: 'copy', pattern: '*.txt' + select bar.out + } } ``` -The above snippet defines a workflow named `my_pipeline`, that can be invoked from another workflow as `my_pipeline()`, just like any other function or process. +The output block must be defined after the implicit workflow. -### Workflow parameters +### Output directory -A workflow component can access any variable or parameter defined in the global scope: +The `directory` statement is used to set the top-level output directory of the workflow: ```groovy -params.data = '/some/data/file' +output { + directory 'results' -workflow my_pipeline { - if( params.data ) - bar(params.data) - else - bar(foo()) + // ... } ``` -### Workflow inputs +It is optional, and it defaults to the launch directory (`workflow.launchDir`). -A workflow can declare one or more input channels using the `take` keyword. For example: +The output directory can also be defined using the `-output-dir` {ref}`command line option ` or the `outputDir` {ref}`config option `. + +### Path definitions + +Path definitions are used to definte the directory structure of the published outputs. A path definition is a path name followed by a block which defines the outputs to be published within that path. Like directories, path definitions can be nested. + +The path name defines a subdirectory within the output directory, or the parent path if the path definition is nested. + +For example, given the following output block: ```groovy -workflow my_pipeline { - take: data +output { + directory 'results' - main: - foo(data) - bar(foo.out) + 'foo' { + // ... + } + + 'bar' { + // ... + + 'baz' { + // ... + } + } } ``` -Multiple inputs must be specified on separate lines: +The following directory structure will be created by the workflow: + +``` +results/ +└── foo/ + └── ... +└── bar/ + └── baz/ + └── ... + └── ... +``` + +The path name may also contain multiple subdirectories separated by a slash `/`: ```groovy -workflow my_pipeline { - take: - data1 - data2 +output { + 'foo/bar/baz' { + // ... + } +} +``` - main: - foo(data1, data2) - bar(foo.out) +It is a shorthand for the following: + +```groovy +output { + 'foo' { + 'bar' { + 'baz' { + // ... + } + } + } } ``` -:::{warning} -When the `take` keyword is used, the beginning of the workflow body must be defined with the `main` keyword. -::: +### Channel selectors -Inputs can be specified like arguments when invoking the workflow: +The `select` statement is used to select channels to publish: ```groovy -workflow { - my_pipeline( channel.from('/some/data') ) +output { + 'foo' { + select foo.out + } } ``` -### Workflow outputs +Any channel defined in the implicit workflow can be referenced in a channel selector, including process and subworkflow outputs. -A workflow can declare one or more output channels using the `emit` keyword. For example: +:::{note} +A process/subworkflow output (e.g. `foo.out`) can only be selected directly if it contains a single output channel. Multi-channel outputs must be selected by index or name, e.g. `foo.out[0]` or `foo.out.samples`. +::: + +By default, all files emitted by the channel will be published into the specified directory. If a list value emitted by the channel contains any files, including files within nested lists, they will also be published. For example: ```groovy -workflow my_pipeline { - main: - foo(data) - bar(foo.out) +workflow { + ch_samples = Channel.of( + [ [id: 'sample1'], file('sample1.txt') ] + ) +} - emit: - bar.out +output { + 'samples' { + // sample1.txt will be published + select ch_samples + } } ``` -When invoking the workflow, the output channel(s) can be accessed using the `out` property, i.e. `my_pipeline.out`. When multiple output channels are declared, use the array bracket notation or the assignment syntax to access each output channel as described for [process outputs](#process-outputs). +The publishing behavior can be customized further by using [publish options](#publish-options). See that section for more details. -### Workflow named outputs +### Topic selectors -If an output channel is assigned to an identifier in the `emit` block, the identifier can be used to reference the channel from the calling workflow. For example: +:::{note} +This feature requires the `nextflow.preview.topic` feature flag to be enabled. +::: + +The `topic` statement can be used to select a channel topic for publishing: ```groovy -workflow my_pipeline { - main: - foo(data) - bar(foo.out) +output { + 'samples' { + topic 'samples' - emit: - my_data = bar.out + // equivalent to: + select Channel.topic('samples') + } } ``` -The result of the above workflow can be accessed using `my_pipeline.out.my_data`. +Topic selectors are a useful way to select channels which are deeply nested within subworkflows, without needing to propagate them to the top-level workflow. You can use the {ref}`operator-topic` operator or the `topic` option for {ref}`process outputs ` to send a channel to a given topic. -### Workflow entrypoint +Like a channel selector, a topic selector publishes every file that it receives by default, and it can specify [publish options](#publish-options). -A workflow with no name (also known as the *implicit workflow*) is the default entrypoint of the Nextflow pipeline. A different workflow entrypoint can be specified using the `-entry` command line option. +### Publish options -:::{note} -Implicit workflow definitions are ignored when a script is included as a module. This way, a workflow script can be written in such a way that it can be used either as a library module or an application script. -::: +The publishing behavior can be configured using the same options available in the {ref}`process-publishdir` directive. -### Workflow composition +There are two ways to define publish options: -Named workflows can be invoked and composed just like any other process or function. +- The `defaults` statement, which defines publish options for a path defintion -```groovy -workflow flow1 { - take: data - main: - foo(data) - bar(foo.out) - emit: - bar.out -} +- Channel and topic selectors -workflow flow2 { - take: data - main: - foo(data) - baz(foo.out) - emit: - baz.out -} +Publish options are resolved in a cascading manner, in which more specific settings take priority. -workflow { - take: data - main: - flow1(data) - flow2(flow1.out) +Consider the following example: + +```groovy +output { + 'samples' { + defaults mode: 'copy' + // ... + select ch_samples, pattern: '*.txt', mode: 'link' + + 'md5' { + defaults mode: 'link' + // ... + topic 'md5', mode: 'copy' + } + } } ``` -:::{note} -Each workflow invocation has its own scope. As a result, the same process can be invoked in two different workflow scopes, like `foo` in the above snippet, which is used in both `flow1` and `flow2`. The workflow execution path, along with the process names, determines the *fully qualified process name* that is used to distinguish the different process invocations, i.e. `flow1:foo` and `flow2:foo` in the above example. -::: +In this example, the following rules are applied: -:::{tip} -The fully qualified process name can be used as a {ref}`process selector ` in a Nextflow configuration file, and it takes priority over the simple process name. +- All files published to `samples` will be copied by default + +- The channel selector `select ch_samples` will publish via hard link, overriding the default from `samples`. Additionally, only files matching the pattern `*.txt` will be published. + +- All files published to `samples/md5` will be hard-linked by default, overriding the default from `samples`. + +- The topic selector `topic 'md5'` will publish via copy, overriding the default from `samples/md5`. + +:::{note} +The only option from `publishDir` that is not allowed is `path`, because the publish path is defined using path definitions. ::: ## Special operators From 570da2795ec14722b913783ebca2bbef524aafae Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 12:27:11 -0500 Subject: [PATCH 16/47] Add defaults to directory statement Signed-off-by: Ben Sherman --- docs/workflow.md | 14 ++++++++------ .../main/groovy/nextflow/script/WorkflowDef.groovy | 7 +++++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index 47c8b498e5..43f297ce76 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -489,7 +489,9 @@ Like a channel selector, a topic selector publishes every file that it receives The publishing behavior can be configured using the same options available in the {ref}`process-publishdir` directive. -There are two ways to define publish options: +There are several ways to define publish options: + +- The `directory` statement - The `defaults` statement, which defines publish options for a path defintion @@ -501,9 +503,9 @@ Consider the following example: ```groovy output { + directory 'results', mode: 'copy' + 'samples' { - defaults mode: 'copy' - // ... select ch_samples, pattern: '*.txt', mode: 'link' 'md5' { @@ -517,11 +519,11 @@ output { In this example, the following rules are applied: -- All files published to `samples` will be copied by default +- All files will be copied by default -- The channel selector `select ch_samples` will publish via hard link, overriding the default from `samples`. Additionally, only files matching the pattern `*.txt` will be published. +- The channel selector `select ch_samples` will publish via hard link, overriding the output directory default. Additionally, only files matching the pattern `*.txt` will be published. -- All files published to `samples/md5` will be hard-linked by default, overriding the default from `samples`. +- All files published to `samples/md5` will be hard-linked by default, overriding the output directory default. - The topic selector `topic 'md5'` will publish via copy, overriding the default from `samples/md5`. diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index 75058b56d3..3db319e493 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -299,6 +299,8 @@ class WorkflowPublishDsl { private Path directory = (Global.session as Session).outputDir + private Map defaults = [:] + private boolean directoryOnce = false WorkflowPublishDsl(Binding binding) { @@ -315,16 +317,17 @@ class WorkflowPublishDsl { } } - void directory(String directory) { + void directory(Map defaults=[:], String directory) { if( directoryOnce ) throw new ScriptRuntimeException("Output directory cannot be defined more than once in the workflow output definition") directoryOnce = true this.directory = (directory as Path).complete() + this.defaults = defaults } void path(String path, Closure closure) { - final dsl = new PathDsl(directory.resolve(path), [:]) + final dsl = new PathDsl(directory.resolve(path), defaults) final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) From 0ad12eb5c165b22678e1e9c62afb3d0927d79efc Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 19:17:33 -0500 Subject: [PATCH 17/47] Update docs Signed-off-by: Ben Sherman --- docs/channel.md | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/docs/channel.md b/docs/channel.md index 5d615d1371..9dfa9ca00c 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -467,17 +467,9 @@ See also: [channel.fromList](#fromlist) factory method. This feature requires the `nextflow.preview.topic` feature flag to be enabled. ::: -A *topic* is a channel type introduced as of Nextflow 23.11.0-edge along with {ref}`channel-type-value` and -{ref}`channel-type-queue`. +A *topic channel*, similar to a *queue channel*, is a non-blocking unidirectional FIFO queue, with the ability to implicitly receive values from multiple sources based on a *topic name*. -A *topic channel*, similarly to a *queue channel*, is non-blocking unidirectional FIFO queue, however it connects -multiple *producer* processes with multiple *consumer* processes or operators. - -:::{tip} -You can think about it as a channel that is shared across many different process using the same *topic name*. -::: - -A process output can be assigned to a topic using the `topic` option on an output, for example: +A process output can be sent to a topic using the `topic` option, for example: ```groovy process foo { @@ -491,15 +483,34 @@ process bar { } ``` -The `channel.topic` method allows referencing the topic channel with the specified name, which can be used as a process -input or operator composition as any other Nextflow channel: +See also: the `topic` option for {ref}`process outputs `. + +Additionally, the `topic` operator can be used to send any channel to a topic: + +```groovy +ch_foo | topic('my_topic') +ch_bar | topic('my_topic') +``` + +Finally, the `Channel.topic()` factory can be used to consume the resulting channel for a given topic name, which can be used like any other channel: ```groovy channel.topic('my-topic').view() ``` -This approach is a convenient way to collect related items from many different sources without explicitly defining -the logic connecting many different queue channels altogether, commonly using the `mix` operator. +The same topic can be consumed using `Channel.topic()` any number of times, similar to referencing a channel multiple times. + +This approach is a convenient way to collect related items from many different sources without all of the logic that is required to connect them, e.g. using the `mix` operator. + +:::{warning} +Avoid creating a circular dependency within a topic, as it will cause the pipeline to run forever. For example: + +```groovy +Channel.topic('circular') + | /* ... */ + | topic('circular') +``` +::: :::{warning} Any process that consumes a channel topic should not send any outputs to that topic, or else the pipeline will hang forever. From 3e0823be0fd86429917c7a8f01a1e9d02941e7ad Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 30 Mar 2024 07:54:18 -0500 Subject: [PATCH 18/47] Apply suggestions from review Signed-off-by: Ben Sherman --- docs/channel.md | 32 +++-- docs/cli.md | 5 - docs/config.md | 5 - docs/metadata.md | 5 - docs/operator.md | 25 ---- docs/process.md | 3 +- docs/snippets/topic.nf | 4 - docs/snippets/topic.out | 3 - docs/workflow.md | 121 +++++++++++++----- .../src/main/groovy/nextflow/Session.groovy | 8 -- .../nextflow/ast/NextflowDSLImpl.groovy | 31 ++++- .../main/groovy/nextflow/cli/CmdRun.groovy | 3 - .../nextflow/config/ConfigBuilder.groovy | 4 - .../nextflow/extension/IntoTopicOp.groovy | 2 +- .../nextflow/extension/OperatorImpl.groovy | 7 - .../nextflow/extension/PublishOp.groovy | 12 +- .../groovy/nextflow/script/WorkflowDef.groovy | 110 ++++++++++------ .../nextflow/script/WorkflowMetadata.groovy | 6 - .../nextflow/extension/IntoTopicOpTest.groovy | 49 ------- tests/publish-dsl.nf | 15 ++- 20 files changed, 231 insertions(+), 219 deletions(-) delete mode 100644 docs/snippets/topic.nf delete mode 100644 docs/snippets/topic.out delete mode 100644 modules/nextflow/src/test/groovy/nextflow/extension/IntoTopicOpTest.groovy diff --git a/docs/channel.md b/docs/channel.md index 9dfa9ca00c..e366fea419 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -483,13 +483,21 @@ process bar { } ``` -See also: the `topic` option for {ref}`process outputs `. - -Additionally, the `topic` operator can be used to send any channel to a topic: +Additionally, the `topic:` section of a workflow definition can be used to send channels defined in a workflow to a topic: ```groovy -ch_foo | topic('my_topic') -ch_bar | topic('my_topic') +workflow foobar { + main: + foo() + bar() + + topic: + foo.out >> 'my_topic' + bar.out >> 'my_topic' + + emit: + bar.out +} ``` Finally, the `Channel.topic()` factory can be used to consume the resulting channel for a given topic name, which can be used like any other channel: @@ -503,20 +511,10 @@ The same topic can be consumed using `Channel.topic()` any number of times, simi This approach is a convenient way to collect related items from many different sources without all of the logic that is required to connect them, e.g. using the `mix` operator. :::{warning} -Avoid creating a circular dependency within a topic, as it will cause the pipeline to run forever. For example: - -```groovy -Channel.topic('circular') - | /* ... */ - | topic('circular') -``` -::: - -:::{warning} -Any process that consumes a channel topic should not send any outputs to that topic, or else the pipeline will hang forever. +Avoid creating a circular dependency within a topic (e.g. a process that consumes a channel topic and sends outputs to that same topic), as it will cause the pipeline to run forever. ::: -See also: {ref}`process-additional-options` for process outputs. +See also: {ref}`process-additional-options` for process outputs and the {ref}`workflow topic section `. (channel-value)= diff --git a/docs/cli.md b/docs/cli.md index 45829bc585..609b5ef542 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1193,11 +1193,6 @@ The `run` command is used to execute a local pipeline script or remote pipeline `-offline` : Do not check for remote project updates. -`-o, -output-dir` -: :::{versionadded} 24.04.0 - ::: -: Directory where workflow outputs are published. - `-params-file` : Load script parameters from a JSON/YAML file. diff --git a/docs/config.md b/docs/config.md index f4597b8b9d..c46cfbd85a 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1704,11 +1704,6 @@ There are additional variables that can be defined within a configuration file t `dumpHashes` : If `true`, dump task hash keys in the log file, for debugging purposes. Equivalent to the `-dump-hashes` option of the `run` command. -`outputDir` -: :::{versionadded} 24.04.0 - ::: -: Defines the pipeline output directory. Equivalent to the `-output-dir` option of the `run` command. - `resume` : If `true`, enable the use of previously cached task executions. Equivalent to the `-resume` option of the `run` command. diff --git a/docs/metadata.md b/docs/metadata.md index 782d079f25..529060bb98 100644 --- a/docs/metadata.md +++ b/docs/metadata.md @@ -67,11 +67,6 @@ The following table lists the properties that can be accessed on the `workflow` `workflow.manifest` : Entries of the workflow manifest. -`workflow.outputDir` -: :::{versionadded} 24.04.0 - ::: -: Directory where workflow outputs are published. - `workflow.profile` : Used configuration profile. diff --git a/docs/operator.md b/docs/operator.md index 02545a68ff..56a8676366 100644 --- a/docs/operator.md +++ b/docs/operator.md @@ -1593,31 +1593,6 @@ collect(flat: false, sort: true).ifEmpty([]) See also: [collect](#collect) -(operator-topic)= - -## topic - -:::{versionadded} 24.04.0 -::: - -:::{note} -This feature requires the `nextflow.preview.topic` feature flag to be enabled. -::: - -*Returns: the source channel* - -The `topic` operator sends each value from a source channel to a given {ref}`channel topic `. - -For example: - -```{literalinclude} snippets/topic.nf -:language: groovy -``` - -```{literalinclude} snippets/topic.out -:language: console -``` - ## transpose *Returns: queue channel* diff --git a/docs/process.md b/docs/process.md index 1ab5c029f0..e05fc7decd 100644 --- a/docs/process.md +++ b/docs/process.md @@ -1008,7 +1008,7 @@ Some caveats on glob pattern behavior: Although the input files matching a glob output declaration are not included in the resulting output channel, these files may still be transferred from the task scratch directory to the original task work directory. Therefore, to avoid unnecessary file copies, avoid using loose wildcards when defining output files, e.g. `path '*'`. Instead, use a prefix or a suffix to restrict the set of matching files to only the expected ones, e.g. `path 'prefix_*.sorted.bam'`. ::: -Read more about glob syntax at the following link [What is a glob?][what is a glob?] +Read more about glob syntax at the following link [What is a glob?][glob] ### Dynamic output file names @@ -2672,4 +2672,3 @@ process foo { ``` [glob]: http://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob -[what is a glob?]: http://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob diff --git a/docs/snippets/topic.nf b/docs/snippets/topic.nf deleted file mode 100644 index c55e63158c..0000000000 --- a/docs/snippets/topic.nf +++ /dev/null @@ -1,4 +0,0 @@ -nextflow.preview.topic = true - -Channel.of(1,2,3) | topic('foo') -Channel.topic('foo').view() \ No newline at end of file diff --git a/docs/snippets/topic.out b/docs/snippets/topic.out deleted file mode 100644 index 5f5fbe759f..0000000000 --- a/docs/snippets/topic.out +++ /dev/null @@ -1,3 +0,0 @@ -1 -2 -3 \ No newline at end of file diff --git a/docs/workflow.md b/docs/workflow.md index 43f297ce76..bf493c09c7 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -51,7 +51,7 @@ Implicit workflow definitions are ignored when a script is included as a module. ## Named workflows -A named workflow is a "subworkflow" that can be invoked from other workflows. For example: +A named workflow is a workflow that can be invoked from other workflows. For example: ```groovy workflow my_pipeline { @@ -82,7 +82,7 @@ workflow { ``` :::{tip} -The use of global variables and params in subworkflows is discouraged because it breaks the modularity of the workflow. As a best practice, every workflow input should be explicitly defined as such in the `take:` block, and params should only be used in the implicit workflow. +The use of global variables and params in named workflows is discouraged because it breaks the modularity of the workflow. As a best practice, every workflow input should be explicitly defined as such in the `take:` block, and params should only be used in the implicit workflow. ::: ## Workflow inputs (`take`) @@ -147,6 +147,35 @@ workflow my_pipeline { The result of the above workflow can be accessed using `my_pipeline.out.my_data`. +(workflow-topics)= + +## Workflow topics (`topic`) + +:::{versionadded} 24.04.0 +::: + +:::{note} +This feature requires the `nextflow.preview.topic` feature flag to be enabled. +::: + +The `topic` section can be used to send channels defined in a workflow, including process and sub-workflow outputs, into a topic. For example: + +```groovy +workflow my_pipeline { + main: + foo(data) + bar(foo.out) + + topic: + foo.out >> 'foo' + + emit: + bar.out +} +``` + +In the above example, the channel `foo.out` (assumed to be a single channel) is sent to topic `foo`, without being emitted as a workflow output. + (workflow-process-invocation)= ## Invoking processes @@ -278,7 +307,7 @@ workflow { } ``` -## Invoking subworkflows +## Invoking workflows Named workflows can be invoked and composed just like any other process or function. @@ -361,11 +390,9 @@ output { It is optional, and it defaults to the launch directory (`workflow.launchDir`). -The output directory can also be defined using the `-output-dir` {ref}`command line option ` or the `outputDir` {ref}`config option `. - ### Path definitions -Path definitions are used to definte the directory structure of the published outputs. A path definition is a path name followed by a block which defines the outputs to be published within that path. Like directories, path definitions can be nested. +Path definitions are used to define the directory structure of the published outputs. A path definition is a path name followed by a block which defines the outputs to be published within that path. Like directories, path definitions can be nested. The path name defines a subdirectory within the output directory, or the parent path if the path definition is nested. @@ -425,22 +452,22 @@ output { } ``` -### Channel selectors +### Selecting channels -The `select` statement is used to select channels to publish: +The `from` statement is used to select channels to publish: ```groovy output { 'foo' { - select foo.out + from foo.out } } ``` -Any channel defined in the implicit workflow can be referenced in a channel selector, including process and subworkflow outputs. +Any channel defined in the implicit workflow can be selected, including process and workflow outputs. :::{note} -A process/subworkflow output (e.g. `foo.out`) can only be selected directly if it contains a single output channel. Multi-channel outputs must be selected by index or name, e.g. `foo.out[0]` or `foo.out.samples`. +A process/workflow output (e.g. `foo.out`) can only be selected directly if it contains a single output channel. Multi-channel outputs must be selected by index or name, e.g. `foo.out[0]` or `foo.out.samples`. ::: By default, all files emitted by the channel will be published into the specified directory. If a list value emitted by the channel contains any files, including files within nested lists, they will also be published. For example: @@ -455,47 +482,45 @@ workflow { output { 'samples' { // sample1.txt will be published - select ch_samples + from ch_samples } } ``` The publishing behavior can be customized further by using [publish options](#publish-options). See that section for more details. -### Topic selectors +### Selecting topics :::{note} This feature requires the `nextflow.preview.topic` feature flag to be enabled. ::: -The `topic` statement can be used to select a channel topic for publishing: +The `from` statement can also be used to select a topic by name: ```groovy output { 'samples' { - topic 'samples' + from 'samples' // equivalent to: - select Channel.topic('samples') + from Channel.topic('samples') } } ``` -Topic selectors are a useful way to select channels which are deeply nested within subworkflows, without needing to propagate them to the top-level workflow. You can use the {ref}`operator-topic` operator or the `topic` option for {ref}`process outputs ` to send a channel to a given topic. - -Like a channel selector, a topic selector publishes every file that it receives by default, and it can specify [publish options](#publish-options). +Topics are a useful way to publish channels which are deeply nested within workflows, without needing to propagate them to the top-level workflow. You can use the `topic:` workflow section, or the `topic` option for {ref}`process outputs `, to send a channel to a given topic. ### Publish options -The publishing behavior can be configured using the same options available in the {ref}`process-publishdir` directive. +The publishing behavior can be configured using a set of options similar to those for the {ref}`process-publishdir` directive. There are several ways to define publish options: - The `directory` statement -- The `defaults` statement, which defines publish options for a path defintion +- The `defaults` statement, which defines publish options for a path definition -- Channel and topic selectors +- The `from` statement, which defines publish options for an individual selector Publish options are resolved in a cascading manner, in which more specific settings take priority. @@ -506,12 +531,12 @@ output { directory 'results', mode: 'copy' 'samples' { - select ch_samples, pattern: '*.txt', mode: 'link' + from ch_samples, pattern: '*.txt', mode: 'link' 'md5' { defaults mode: 'link' // ... - topic 'md5', mode: 'copy' + from 'md5', mode: 'copy' } } } @@ -521,15 +546,53 @@ In this example, the following rules are applied: - All files will be copied by default -- The channel selector `select ch_samples` will publish via hard link, overriding the output directory default. Additionally, only files matching the pattern `*.txt` will be published. +- The channel selector `from ch_samples` will publish via hard link, overriding the output directory default. Additionally, only files matching the pattern `*.txt` will be published. - All files published to `samples/md5` will be hard-linked by default, overriding the output directory default. -- The topic selector `topic 'md5'` will publish via copy, overriding the default from `samples/md5`. +- The topic selector `from 'md5'` will publish via copy, overriding the default from `samples/md5`. -:::{note} -The only option from `publishDir` that is not allowed is `path`, because the publish path is defined using path definitions. -::: +Available options: + +`contentType` +: :::{versionadded} 22.10.0 + ::: +: *Experimental: currently only supported for S3.* +: Allow specifying the media content type of the published file a.k.a. [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_Types). If set to `true`, the content type is inferred from the file extension (default: `false`). + +`enabled` +: Enable or disable publishing (default: `true`). + +`ignoreErrors` +: When `true`, the pipeline will not fail if a file can't be published for any reason (default: `false`). + +`mode` +: The file publishing method. Can be one of the following values: + + - `'copy'`: Copies the output files into the publish directory. + - `'copyNoFollow'`: Copies the output files into the publish directory without following symlinks ie. copies the links themselves. + - `'link'`: Creates a hard link in the publish directory for each output file. + - `'move'`: Moves the output files into the publish directory. **Note**: this is only supposed to be used for a *terminal* process i.e. a process whose output is not consumed by any other downstream process. + - `'rellink'`: Creates a relative symbolic link in the publish directory for each output file. + - `'symlink'`: Creates an absolute symbolic link in the publish directory for each output file (default). + +`overwrite` +: When `true` any existing file in the specified folder will be overwritten (default: `false` if the task was cached on a resumed run, `true` otherwise). + +`pattern` +: Specifies a [glob][http://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob] file pattern that selects which files to publish from the source channel. + +`storageClass` +: :::{versionadded} 22.12.0-edge + ::: +: *Experimental: currently only supported for S3.* +: Allow specifying the storage class to be used for the published file. + +`tags` +: :::{versionadded} 21.12.0-edge + ::: +: *Experimental: currently only supported for S3.* +: Allow the association of arbitrary tags with the published file e.g. `tags: [FOO: 'Hello world']`. ## Special operators diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 53e9d2e348..5ca23559d7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -119,11 +119,6 @@ class Session implements ISession { */ boolean resumeMode - /** - * The folder where pipeline results are published - */ - Path outputDir - /** * The folder where tasks temporary files are stored */ @@ -367,9 +362,6 @@ class Session implements ISession { // -- DAG object this.dag = new DAG() - // -- init output dir - this.outputDir = ((config.outputDir ?: '.') as Path).complete() - // -- init work dir this.workDir = ((config.workDir ?: 'work') as Path).complete() this.setLibDir( config.libDir as String ) diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index 481028cd8c..27183b4e10 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -82,7 +82,8 @@ class NextflowDSLImpl implements ASTTransformation { final static private String WORKFLOW_TAKE = 'take' final static private String WORKFLOW_EMIT = 'emit' final static private String WORKFLOW_MAIN = 'main' - final static private List SCOPES = [WORKFLOW_TAKE, WORKFLOW_EMIT, WORKFLOW_MAIN] + final static private String WORKFLOW_TOPIC = 'topic' + final static private List SCOPES = [WORKFLOW_TAKE, WORKFLOW_EMIT, WORKFLOW_MAIN, WORKFLOW_TOPIC] final static public String PROCESS_WHEN = 'when' final static public String PROCESS_STUB = 'stub' @@ -429,6 +430,26 @@ class NextflowDSLImpl implements ASTTransformation { return result } + protected Statement normWorkflowTopic(ExpressionStatement stm) { + if( stm.expression !instanceof BinaryExpression ) { + syntaxError(stm, "Workflow malformed topic statement") + return stm + } + + final binaryX = (BinaryExpression)stm.expression + if( binaryX.operation.type != Types.RIGHT_SHIFT ) { + syntaxError(stm, "Workflow malformed topic statement") + return stm + } + + if( binaryX.rightExpression !instanceof ConstantExpression ) { + syntaxError(stm, "Workflow malformed topic statement") + return stm + } + + return stmt( callThisX('_into_topic', new ArgumentListExpression(binaryX.leftExpression, binaryX.rightExpression)) ) + } + protected Expression makeWorkflowDefWrapper( ClosureExpression closure, boolean anonymous ) { final codeBlock = (BlockStatement) closure.code @@ -468,6 +489,14 @@ class NextflowDSLImpl implements ASTTransformation { body.add(stm) break + case WORKFLOW_TOPIC: + if( !(stm instanceof ExpressionStatement) ) { + syntaxError(stm, "Workflow malformed topic statement") + break + } + body.add(normWorkflowTopic(stm as ExpressionStatement)) + break + default: if( context ) { def opts = SCOPES.closest(context) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 57661c552a..bca2166379 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -107,9 +107,6 @@ class CmdRun extends CmdBase implements HubOptions { @Parameter(names=['-test'], description = 'Test a script function with the name specified') String test - @Parameter(names=['-o', '-output-dir'], description = 'Directory where workflow outputs are published') - String outputDir - @Parameter(names=['-w', '-work-dir'], description = 'Directory where intermediate result files are stored') String workDir diff --git a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy index 70e25434c4..362a25af1d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy @@ -545,10 +545,6 @@ class ConfigBuilder { if( cmdRun.stubRun ) config.stubRun = cmdRun.stubRun - // -- set the output directory - if( cmdRun.outputDir ) - config.outputDir = cmdRun.outputDir - // -- sets the working directory if( cmdRun.workDir ) config.workDir = cmdRun.workDir diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy index 0c66baa0c5..208332565f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy @@ -29,7 +29,7 @@ import nextflow.Global import nextflow.Session import nextflow.extension.DataflowHelper /** - * Implements the {@link OperatorImpl#topic} operator + * Operator that sends a channel to a topic * * @author Ben Sherman */ diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy index 054a48e7a3..63970a8c55 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/OperatorImpl.groovy @@ -43,7 +43,6 @@ import nextflow.splitter.FastaSplitter import nextflow.splitter.FastqSplitter import nextflow.splitter.JsonSplitter import nextflow.splitter.TextSplitter -import org.codehaus.groovy.runtime.InvokerHelper import org.codehaus.groovy.runtime.callsite.BooleanReturningMethodInvoker import org.codehaus.groovy.runtime.typehandling.DefaultTypeTransformation /** @@ -1240,10 +1239,4 @@ class OperatorImpl { .getOutput() } - DataflowReadChannel topic(DataflowReadChannel source, String name) { - if( !NF.topicChannelEnabled ) throw new MissingMethodException('topic', OperatorImpl.class, InvokerHelper.EMPTY_ARGS) - new IntoTopicOp(source, name).apply() - return source - } - } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index da8a1004e3..a6cbb6be10 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -40,6 +40,8 @@ class PublishOp { private Path sourceDir + private volatile boolean complete + private Session getSession() { Global.session as Session } PublishOp(DataflowReadChannel source, Map opts) { @@ -48,15 +50,18 @@ class PublishOp { this.publisher = PublishDir.create(this.opts) } + protected boolean getComplete() { complete } + PublishOp apply() { final events = new HashMap(2) events.onNext = this.&publish0 + events.onComplete = this.&done0 DataflowHelper.subscribeImpl(source, events) return this } protected void publish0(entry) { - log.debug "Publish operator got: $entry" + log.trace "Publish operator got: $entry" sourceDir = null // use a set to avoid duplicates final result = new HashSet(10) @@ -64,6 +69,11 @@ class PublishOp { publisher.apply(result, sourceDir) } + protected void done0(nope) { + log.trace "Publish operator complete" + this.complete = true + } + protected void collectFiles(entry, Collection result) { if( entry instanceof Path ) { result.add(entry) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index 3db319e493..9ef91ae8fb 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -17,18 +17,18 @@ package nextflow.script import java.nio.file.Path +import java.nio.file.Paths import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowWriteChannel -import nextflow.Channel -import nextflow.Global -import nextflow.Session +import nextflow.NF import nextflow.exception.MissingProcessException import nextflow.exception.MissingValueException import nextflow.exception.ScriptRuntimeException import nextflow.extension.CH +import nextflow.extension.IntoTopicOp import nextflow.extension.PublishOp /** * Models a script workflow component @@ -65,7 +65,7 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec this.name = name // invoke the body resolving in/out params final copy = (Closure)rawBody.clone() - final resolver = new WorkflowParamsResolver() + final resolver = new WorkflowParamsDsl() copy.setResolveStrategy(Closure.DELEGATE_FIRST) copy.setDelegate(resolver) this.body = copy.call() @@ -211,17 +211,16 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec collectInputs(binding, args) // invoke the workflow execution final closure = body.closure - closure.delegate = binding + closure.setDelegate(new WorkflowDsl(binding)) closure.setResolveStrategy(Closure.DELEGATE_FIRST) closure.call() // collect the workflow outputs output = collectOutputs(declaredOutputs) // publish the workflow outputs if( publisher ) { - final dsl = new WorkflowPublishDsl(binding) final cl = (Closure)publisher.clone() + cl.setDelegate(new WorkflowPublishDsl(binding)) cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) cl.call() } return output @@ -230,11 +229,11 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec } /** - * Hold workflow parameters + * Implements the DSL for defining workflow takes and emits */ @Slf4j @CompileStatic -class WorkflowParamsResolver { +class WorkflowParamsDsl { static final private String TAKE_PREFIX = '_take_' static final private String EMIT_PREFIX = '_emit_' @@ -254,25 +253,62 @@ class WorkflowParamsResolver { else throw new MissingMethodException(name, WorkflowDef, args) } +} - private Map argsToMap(Object args) { - if( args && args.getClass().isArray() ) { - if( ((Object[])args)[0] instanceof Map ) { - def map = (Map)((Object[])args)[0] - return new HashMap(map) - } +/** + * Implements the DSL for executing the workflow + * + * @author Ben Sherman + */ +@Slf4j +@CompileStatic +class WorkflowDsl { + + private Binding binding + + WorkflowDsl(Binding binding) { + this.binding = binding + } + + @Override + Object getProperty(String name) { + try { + return binding.getProperty(name) + } + catch( MissingPropertyException e ){ + return super.getProperty(name) } - Collections.emptyMap() } - private Map argToPublishOpts(Object args) { - final opts = argsToMap(args) - if( opts.containsKey('saveAs')) { - log.warn "Workflow publish does not support `saveAs` option" - opts.remove('saveAs') + @Override + Object invokeMethod(String name, Object args) { + if( name == '_into_topic' ) { + final args0 = args as Object[] + if( args0[0] instanceof DataflowWriteChannel ) + _into_topic(args0[0] as DataflowWriteChannel, args0[1] as String) + else if( args0[0] instanceof ChannelOut ) + _into_topic(args0[0] as ChannelOut, args0[1] as String) + else + throw new IllegalArgumentException("Workflow topic source should be a channel") } - return opts + else + binding.invokeMethod(name, args) } + + void _into_topic(DataflowWriteChannel source, String name) { + if( !NF.topicChannelEnabled ) + throw new ScriptRuntimeException("Workflow `topic:` section requires the `nextflow.preview.topic` feature flag") + new IntoTopicOp(CH.getReadChannel(source), name).apply() + } + + void _into_topic(ChannelOut out, String name) { + if( !NF.topicChannelEnabled ) + throw new ScriptRuntimeException("Workflow `topic:` section requires the `nextflow.preview.topic` feature flag") + if( out.size() != 1 ) + throw new IllegalArgumentException("Cannot send a multi-channel output into a topic") + _into_topic(out[0], name) + } + } /** @@ -286,18 +322,17 @@ class WorkflowPublishDsl { private static final List PUBLISH_OPTIONS = List.of( 'contentType', 'enabled', - 'failOnError', + 'ignoreErrors', 'mode', 'overwrite', 'pattern', - 'saveAs', 'storageClass', 'tags' ) private Binding binding - private Path directory = (Global.session as Session).outputDir + private Path directory = Paths.get('.').complete() private Map defaults = [:] @@ -362,31 +397,30 @@ class WorkflowPublishDsl { cl.call() } - void select(Map opts=[:], DataflowWriteChannel source) { + void from(Map opts=[:], DataflowWriteChannel source) { validatePublishOptions(opts) + if( opts.ignoreErrors ) + opts.failOnError = !opts.remove('ignoreErrors') new PublishOp(CH.getReadChannel(source), defaults + opts + [path: path]).apply() } - void select(Map opts=[:], ChannelOut out) { + void from(Map opts=[:], ChannelOut out) { if( out.size() != 1 ) throw new IllegalArgumentException("Cannot publish a multi-channel output") - select(opts, out[0]) + from(opts, out[0]) } - void topic(Map opts=[:], String name) { - select(opts, Channel.topic(name)) + void from(Map opts=[:], String name) { + if( !NF.topicChannelEnabled ) throw new ScriptRuntimeException("Topic selector in workflow output definition requires the `nextflow.preview.topic` feature flag") + from(opts, CH.topic(name)) } private void validatePublishOptions(Map opts) { - for( final name : opts.keySet() ) { - if( name !in PUBLISH_OPTIONS ) { - final msg = name == 'path' - ? "Publish option 'path' is not allowed in the workflow output definition, use path definitions instead" - : "Unrecognized publish option '${name}' in the workflow output definition".toString() - throw new IllegalArgumentException(msg) - } - } + for( final name : opts.keySet() ) + if( name !in PUBLISH_OPTIONS ) + throw new IllegalArgumentException("Unrecognized publish option '${name}' in the workflow output definition") } } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy index 1d7ce3e5da..07394e5e55 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowMetadata.groovy @@ -139,11 +139,6 @@ class WorkflowMetadata { */ Path launchDir - /** - * Workflow output directory - */ - Path outputDir - /** * Workflow working directory */ @@ -239,7 +234,6 @@ class WorkflowMetadata { this.container = session.fetchContainers() this.commandLine = session.commandLine this.nextflow = NextflowMeta.instance - this.outputDir = session.outputDir this.workDir = session.workDir this.launchDir = Paths.get('.').complete() this.profile = session.profile ?: ConfigBuilder.DEFAULT_PROFILE diff --git a/modules/nextflow/src/test/groovy/nextflow/extension/IntoTopicOpTest.groovy b/modules/nextflow/src/test/groovy/nextflow/extension/IntoTopicOpTest.groovy deleted file mode 100644 index 8bc8f82a2f..0000000000 --- a/modules/nextflow/src/test/groovy/nextflow/extension/IntoTopicOpTest.groovy +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package nextflow.extension - -import nextflow.Channel -import test.Dsl2Spec -import test.MockScriptRunner - -/** - * - * @author Paolo Di Tommaso - */ -class IntoTopicOpTest extends Dsl2Spec { - - def 'should send a channel into a topic' () { - given: - def SCRIPT = ''' - nextflow.preview.topic = true - Channel.of(1,2,3) | topic('foo') - Channel.value(4) | topic('foo') - Channel.topic('foo').collect() - ''' - - when: - def runner = new MockScriptRunner() - def result = runner.setScript(SCRIPT).execute().getVal() - then: - 1 in result - 2 in result - 3 in result - 4 in result - } - -} diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index 3a5c655971..61ee4bbb5e 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -65,21 +65,24 @@ workflow { my_combine( bam, bai ) my_combine.out.view{ it.text } - foo | topic('foo') + foo() + + topic: + foo.out >> 'foo' } output { directory 'results' 'data' { - select align.out[0], mode: 'copy' - select align.out[1], mode: 'copy' - select my_combine.out - topic 'foo', mode: 'link' + from align.out[0], mode: 'copy' + from align.out[1], mode: 'copy' + from my_combine.out + from 'foo', mode: 'link' } 'data/more' { defaults mode: 'copy' - select my_combine.out + from my_combine.out } } From 5a1a7bbcccb4a9157fb362eca758091ba455c651 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 30 Mar 2024 09:38:28 -0500 Subject: [PATCH 19/47] Fix workflow binding Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/script/WorkflowDef.groovy | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index 9ef91ae8fb..000488f84e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -280,6 +280,11 @@ class WorkflowDsl { } } + @Override + void setProperty(String name, Object value) { + binding.setProperty(name, value) + } + @Override Object invokeMethod(String name, Object args) { if( name == '_into_topic' ) { From 3bdd0febec741baba3e22ed793c530eb00a24b70 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 6 Apr 2024 15:27:13 -0500 Subject: [PATCH 20/47] Fix dynamic path name Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index 27183b4e10..faed9f5638 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -578,12 +578,12 @@ class NextflowDSLImpl implements ASTTransformation { if( args.size() != 1 || args[0] !instanceof ClosureExpression ) continue - final pathName = methodCall.getMethodAsString() + final pathName = methodCall.getMethod() final pathBody = (ClosureExpression)args[0] final pathCall = new MethodCallExpression( new VariableExpression('this'), 'path', - new ArgumentListExpression(constX(pathName), pathBody) + new ArgumentListExpression(pathName, pathBody) ) stmtExpr.setExpression(pathCall) From 59d83c03079c07419f4fe213ae78ba35a3db45ae Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 10 Apr 2024 08:49:45 -0500 Subject: [PATCH 21/47] Apply suggestions from review Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/Session.groovy | 3 + .../nextflow/ast/NextflowDSLImpl.groovy | 100 ++++----- .../nextflow/extension/IntoTopicOp.groovy | 83 -------- .../nextflow/extension/PublishOp.groovy | 2 +- .../groovy/nextflow/script/BaseScript.groovy | 13 +- .../nextflow/script/BaseScriptConsts.groovy | 2 +- .../groovy/nextflow/script/OutputDef.groovy | 169 +++++++++++++++ .../nextflow/script/WorkflowBinding.groovy | 13 ++ .../groovy/nextflow/script/WorkflowDef.groovy | 196 +----------------- tests/publish-dsl.nf | 19 +- 10 files changed, 248 insertions(+), 352 deletions(-) delete mode 100644 modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 5ca23559d7..5ab61e41c7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -32,6 +32,7 @@ import groovy.transform.Memoized import groovy.transform.PackageScope import groovy.util.logging.Slf4j import groovyx.gpars.GParsConfig +import groovyx.gpars.dataflow.DataflowWriteChannel import groovyx.gpars.dataflow.operator.DataflowProcessor import nextflow.cache.CacheDB import nextflow.cache.CacheFactory @@ -94,6 +95,8 @@ class Session implements ISession { final List igniters = new ArrayList<>(20) + final Map publishRules = [:] + /** * Creates process executors */ diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index faed9f5638..92232e32e3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -82,8 +82,8 @@ class NextflowDSLImpl implements ASTTransformation { final static private String WORKFLOW_TAKE = 'take' final static private String WORKFLOW_EMIT = 'emit' final static private String WORKFLOW_MAIN = 'main' - final static private String WORKFLOW_TOPIC = 'topic' - final static private List SCOPES = [WORKFLOW_TAKE, WORKFLOW_EMIT, WORKFLOW_MAIN, WORKFLOW_TOPIC] + final static private String WORKFLOW_PUBLISH = 'publish' + final static private List SCOPES = [WORKFLOW_TAKE, WORKFLOW_EMIT, WORKFLOW_MAIN, WORKFLOW_PUBLISH] final static public String PROCESS_WHEN = 'when' final static public String PROCESS_STUB = 'stub' @@ -430,24 +430,19 @@ class NextflowDSLImpl implements ASTTransformation { return result } - protected Statement normWorkflowTopic(ExpressionStatement stm) { + protected Statement normWorkflowPublish(ExpressionStatement stm) { if( stm.expression !instanceof BinaryExpression ) { - syntaxError(stm, "Workflow malformed topic statement") + syntaxError(stm, "Workflow malformed publish statement") return stm } final binaryX = (BinaryExpression)stm.expression if( binaryX.operation.type != Types.RIGHT_SHIFT ) { - syntaxError(stm, "Workflow malformed topic statement") + syntaxError(stm, "Workflow malformed publish statement") return stm } - if( binaryX.rightExpression !instanceof ConstantExpression ) { - syntaxError(stm, "Workflow malformed topic statement") - return stm - } - - return stmt( callThisX('_into_topic', new ArgumentListExpression(binaryX.leftExpression, binaryX.rightExpression)) ) + return stmt( callThisX('_into_publish', args(binaryX.leftExpression, binaryX.rightExpression)) ) } protected Expression makeWorkflowDefWrapper( ClosureExpression closure, boolean anonymous ) { @@ -489,12 +484,12 @@ class NextflowDSLImpl implements ASTTransformation { body.add(stm) break - case WORKFLOW_TOPIC: + case WORKFLOW_PUBLISH: if( !(stm instanceof ExpressionStatement) ) { - syntaxError(stm, "Workflow malformed topic statement") + syntaxError(stm, "Workflow malformed publish statement") break } - body.add(normWorkflowTopic(stm as ExpressionStatement)) + body.add(normWorkflowPublish(stm as ExpressionStatement)) break default: @@ -524,7 +519,17 @@ class NextflowDSLImpl implements ASTTransformation { } /** - * Apply syntax transformations to the output DSL + * Transform rules in the workflow output definition: + * + * output { + * 'foo' { ... } + * } + * + * becomes: + * + * output { + * rule('foo') { ... } + * } * * @param methodCall * @param unit @@ -533,62 +538,39 @@ class NextflowDSLImpl implements ASTTransformation { log.trace "Convert 'output' ${methodCall.arguments}" assert methodCall.arguments instanceof ArgumentListExpression - final args = (ArgumentListExpression)methodCall.arguments + final arguments = (ArgumentListExpression)methodCall.arguments - if( args.size() != 1 || args[0] !instanceof ClosureExpression ) { + if( arguments.size() != 1 || arguments[0] !instanceof ClosureExpression ) { syntaxError(methodCall, "Invalid output definition") - return + return } - fixOutputPath( (ClosureExpression)args[0] ) - } - - /** - * Fix path declaration in output DSL: - * - * output { - * 'results' { ... } - * } - * - * becomes: - * - * output { - * path('results') { ... } - * } - * - * @param body - */ - protected void fixOutputPath(ClosureExpression body) { - final block = (BlockStatement)body.code + final closure = (ClosureExpression)arguments[0] + final block = (BlockStatement)closure.code for( Statement stmt : block.statements ) { - if( stmt !instanceof ExpressionStatement ) - continue + if( stmt !instanceof ExpressionStatement ) { + syntaxError(stmt, "Invalid output rule definition") + return + } final stmtExpr = (ExpressionStatement)stmt - if( stmtExpr.expression !instanceof MethodCallExpression ) - continue + if( stmtExpr.expression !instanceof MethodCallExpression ) { + syntaxError(stmt, "Invalid output rule definition") + return + } - final methodCall = (MethodCallExpression)stmtExpr.expression - if( methodCall.arguments !instanceof ArgumentListExpression ) - continue + final call = (MethodCallExpression)stmtExpr.expression + assert call.arguments instanceof ArgumentListExpression - // HACK: detect implicit path() call as method call with single closure argument + // HACK: rule definition is a method call with single closure argument // custom parser will be able to detect more elegantly - final args = (ArgumentListExpression)methodCall.arguments - if( args.size() != 1 || args[0] !instanceof ClosureExpression ) + final ruleArgs = (ArgumentListExpression)call.arguments + if( ruleArgs.size() != 1 || ruleArgs[0] !instanceof ClosureExpression ) continue - final pathName = methodCall.getMethod() - final pathBody = (ClosureExpression)args[0] - final pathCall = new MethodCallExpression( - new VariableExpression('this'), - 'path', - new ArgumentListExpression(pathName, pathBody) - ) - stmtExpr.setExpression(pathCall) - - // recursively check nested path calls - fixOutputPath(pathBody) + final ruleName = call.method + final ruleBody = (ClosureExpression)ruleArgs[0] + stmtExpr.expression = callThisX('rule', args(ruleName, ruleBody)) } } diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy deleted file mode 100644 index 208332565f..0000000000 --- a/modules/nextflow/src/main/groovy/nextflow/extension/IntoTopicOp.groovy +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.extension - -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j -import groovyx.gpars.dataflow.DataflowReadChannel -import groovyx.gpars.dataflow.expression.DataflowExpression -import groovyx.gpars.dataflow.operator.ChainWithClosure -import groovyx.gpars.dataflow.operator.CopyChannelsClosure -import groovyx.gpars.dataflow.operator.DataflowEventAdapter -import groovyx.gpars.dataflow.operator.DataflowProcessor -import nextflow.Channel -import nextflow.Global -import nextflow.Session -import nextflow.extension.DataflowHelper -/** - * Operator that sends a channel to a topic - * - * @author Ben Sherman - */ -@Slf4j -@CompileStatic -class IntoTopicOp { - - private DataflowReadChannel source - - private String name - - private Session session = Global.session as Session - - IntoTopicOp( DataflowReadChannel source, String name ) { - this.source = source - this.name = name - } - - void apply() { - final target = CH.createTopicSource(name) - final listener = new DataflowEventAdapter() { - @Override - void afterRun(DataflowProcessor processor, List messages) { - if( source !instanceof DataflowExpression ) - return - // -- terminate the process - processor.terminate() - // -- send a poison pill if needed - if( target !instanceof DataflowExpression ) - target.bind(Channel.STOP) - else if( !(target as DataflowExpression).isBound() ) - target.bind(Channel.STOP) - } - - @Override - public boolean onException(final DataflowProcessor processor, final Throwable e) { - log.error("@unknown", e) - session.abort(e) - return true - } - } - - final params = [ - inputs: List.of(source), - outputs: List.of(target), - listeners: List.of(listener) - ] - DataflowHelper.newOperator(params, new ChainWithClosure(new CopyChannelsClosure())) - } - -} diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index a6cbb6be10..b70758bd33 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -61,7 +61,7 @@ class PublishOp { } protected void publish0(entry) { - log.trace "Publish operator got: $entry" + log.trace "Publish operator received: $entry" sourceDir = null // use a set to avoid duplicates final result = new HashSet(10) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy index c206554a0d..9f46abe314 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy @@ -41,6 +41,8 @@ abstract class BaseScript extends Script implements ExecutionContext { private WorkflowDef entryFlow + private OutputDef publisher + @Lazy InputStream stdin = { System.in }() BaseScript() { @@ -57,6 +59,10 @@ abstract class BaseScript extends Script implements ExecutionContext { (ScriptBinding)super.getBinding() } + Session getSession() { + session + } + /** * Holds the configuration object which will used to execution the user tasks */ @@ -118,11 +124,11 @@ abstract class BaseScript extends Script implements ExecutionContext { protected output(Closure closure) { if( !entryFlow ) - throw new IllegalStateException("Publish definition must be defined after the anonymous workflow") + throw new IllegalStateException("Workflow output definition must be defined after the anonymous workflow") if( ExecutionStack.withinWorkflow() ) - throw new IllegalStateException("Publish definition is not allowed within a workflow") + throw new IllegalStateException("Workflow output definition is not allowed within a workflow") - entryFlow.publisher = closure + publisher = new OutputDef(closure) } protected IncludeDef include( IncludeDef include ) { @@ -187,6 +193,7 @@ abstract class BaseScript extends Script implements ExecutionContext { // invoke the entry workflow session.notifyBeforeWorkflowExecution() final ret = entryFlow.invoke_a(BaseScriptConsts.EMPTY_ARGS) + publisher.run(session.publishRules) session.notifyAfterWorkflowExecution() return ret } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/BaseScriptConsts.groovy b/modules/nextflow/src/main/groovy/nextflow/script/BaseScriptConsts.groovy index 3de58346dc..6e69964e9f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/BaseScriptConsts.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/BaseScriptConsts.groovy @@ -25,5 +25,5 @@ class BaseScriptConsts { public static Object[] EMPTY_ARGS = [] as Object[] - public static List PRIVATE_NAMES = ['session','processFactory','taskProcessor','meta','entryFlow'] + public static List PRIVATE_NAMES = ['session','processFactory','taskProcessor','meta','entryFlow', 'publisher'] } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy new file mode 100644 index 0000000000..9b056e7c1a --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy @@ -0,0 +1,169 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.script + +import java.nio.file.Path +import java.nio.file.Paths + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import groovyx.gpars.dataflow.DataflowWriteChannel +import nextflow.exception.ScriptRuntimeException +import nextflow.extension.CH +import nextflow.extension.PublishOp +/** + * Models a workflow output definition + * + * @author Ben Sherman + */ +@Slf4j +@CompileStatic +class OutputDef { + + private Closure closure + + OutputDef(Closure closure) { + this.closure = closure + } + + void run(Map sources) { + final dsl = new OutputDsl() + final cl = (Closure)closure.clone() + cl.setDelegate(dsl) + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.call() + + dsl.build(sources) + } + +} + +/** + * Implements the DSL for publishing workflow outputs + * + * @author Ben Sherman + */ +@Slf4j +@CompileStatic +class OutputDsl { + + private Map rules = [:] + + private Path directory + + private Map defaults = [:] + + void directory(String directory) { + if( this.directory ) + throw new ScriptRuntimeException("Output directory cannot be defined more than once in the workflow outputs") + this.directory = (directory as Path).complete() + } + + // TODO: other publish options + // - contentType + // - ignoreErrors + // - overwrite + // - storageClass + // - tags + + void mode(String mode) { + setDefault('mode', mode) + } + + private void setDefault(String name, Object value) { + if( defaults.containsKey(name) ) + throw new ScriptRuntimeException("Default `${name}` option cannot be defined more than once in the workflow outputs") + defaults[name] = value + } + + void rule(String name, Closure closure) { + if( rules.containsKey(name) ) + throw new ScriptRuntimeException("Publish rule '${name}' is defined more than once in the workflow outputs") + + final dsl = new RuleDsl() + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + + rules[name] = dsl.getOptions() + } + + void build(Map sources) { + for( final entry : sources ) { + final source = entry.key + final name = entry.value + final opts = publishOptions(name, rules[name] ?: [:]) + + new PublishOp(CH.getReadChannel(source), opts).apply() + } + } + + private Map publishOptions(String name, Map overrides) { + if( !directory ) + directory = Paths.get('.').complete() + + final opts = defaults + overrides + if( opts.containsKey('ignoreErrors') ) + opts.failOnError = !opts.remove('ignoreErrors') + opts.path = directory.resolve(opts.path as String ?: name) + return opts + } + + static class RuleDsl { + + // TODO: + // - contentType + // - pattern (?) + // - storageClass + // - tags + + private Map opts = [:] + + void enabled(boolean enabled) { + setOption('enabled', enabled) + } + + void ignoreErrors(boolean ignoreErrors) { + setOption('ignoreErrors', ignoreErrors) + } + + void mode(String mode) { + setOption('mode', mode) + } + + void overwrite(boolean overwrite) { + setOption('overwrite', overwrite) + } + + void path(String path) { + setOption('path', path) + } + + private void setOption(String name, Object value) { + if( opts.containsKey(name) ) + throw new ScriptRuntimeException("Publish option `${name}` cannot be defined more than once in a given rule") + opts[name] = value + } + + Map getOptions() { + opts + } + + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy index 9b45ee6c59..bc0d2b3035 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy @@ -19,6 +19,7 @@ package nextflow.script import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j +import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.NF import nextflow.exception.IllegalInvocationException import nextflow.extension.OpCall @@ -155,4 +156,16 @@ class WorkflowBinding extends Binding { } } + void _into_publish(DataflowWriteChannel source, String name) { + // TODO: add rules to "default" mapping for component + // then add to workflow publisher only when component is invoked + owner.session.publishRules[source] = name + } + + void _into_publish(ChannelOut out, String name) { + if( out.size() != 1 ) + throw new IllegalArgumentException("Cannot send a multi-channel output into a topic") + _into_publish(out[0], name) + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy index 000488f84e..7a74cc4fff 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowDef.groovy @@ -16,20 +16,14 @@ package nextflow.script -import java.nio.file.Path -import java.nio.file.Paths - import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowWriteChannel -import nextflow.NF import nextflow.exception.MissingProcessException import nextflow.exception.MissingValueException import nextflow.exception.ScriptRuntimeException import nextflow.extension.CH -import nextflow.extension.IntoTopicOp -import nextflow.extension.PublishOp /** * Models a script workflow component * @@ -58,8 +52,6 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec private WorkflowBinding binding - private Closure publisher - WorkflowDef(BaseScript owner, Closure rawBody, String name=null) { this.owner = owner this.name = name @@ -78,10 +70,6 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec /* ONLY FOR TESTING PURPOSE */ protected WorkflowDef() {} - void setPublisher(Closure publisher) { - this.publisher = publisher - } - WorkflowDef clone() { final copy = (WorkflowDef)super.clone() copy.@body = body.clone() @@ -211,18 +199,11 @@ class WorkflowDef extends BindableDef implements ChainableDef, IterableDef, Exec collectInputs(binding, args) // invoke the workflow execution final closure = body.closure - closure.setDelegate(new WorkflowDsl(binding)) + closure.setDelegate(binding) closure.setResolveStrategy(Closure.DELEGATE_FIRST) closure.call() // collect the workflow outputs output = collectOutputs(declaredOutputs) - // publish the workflow outputs - if( publisher ) { - final cl = (Closure)publisher.clone() - cl.setDelegate(new WorkflowPublishDsl(binding)) - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.call() - } return output } @@ -254,178 +235,3 @@ class WorkflowParamsDsl { throw new MissingMethodException(name, WorkflowDef, args) } } - -/** - * Implements the DSL for executing the workflow - * - * @author Ben Sherman - */ -@Slf4j -@CompileStatic -class WorkflowDsl { - - private Binding binding - - WorkflowDsl(Binding binding) { - this.binding = binding - } - - @Override - Object getProperty(String name) { - try { - return binding.getProperty(name) - } - catch( MissingPropertyException e ){ - return super.getProperty(name) - } - } - - @Override - void setProperty(String name, Object value) { - binding.setProperty(name, value) - } - - @Override - Object invokeMethod(String name, Object args) { - if( name == '_into_topic' ) { - final args0 = args as Object[] - if( args0[0] instanceof DataflowWriteChannel ) - _into_topic(args0[0] as DataflowWriteChannel, args0[1] as String) - else if( args0[0] instanceof ChannelOut ) - _into_topic(args0[0] as ChannelOut, args0[1] as String) - else - throw new IllegalArgumentException("Workflow topic source should be a channel") - } - else - binding.invokeMethod(name, args) - } - - void _into_topic(DataflowWriteChannel source, String name) { - if( !NF.topicChannelEnabled ) - throw new ScriptRuntimeException("Workflow `topic:` section requires the `nextflow.preview.topic` feature flag") - new IntoTopicOp(CH.getReadChannel(source), name).apply() - } - - void _into_topic(ChannelOut out, String name) { - if( !NF.topicChannelEnabled ) - throw new ScriptRuntimeException("Workflow `topic:` section requires the `nextflow.preview.topic` feature flag") - if( out.size() != 1 ) - throw new IllegalArgumentException("Cannot send a multi-channel output into a topic") - _into_topic(out[0], name) - } - -} - -/** - * Implements the DSL for publishing workflow outputs - * - * @author Ben Sherman - */ -@CompileStatic -class WorkflowPublishDsl { - - private static final List PUBLISH_OPTIONS = List.of( - 'contentType', - 'enabled', - 'ignoreErrors', - 'mode', - 'overwrite', - 'pattern', - 'storageClass', - 'tags' - ) - - private Binding binding - - private Path directory = Paths.get('.').complete() - - private Map defaults = [:] - - private boolean directoryOnce = false - - WorkflowPublishDsl(Binding binding) { - this.binding = binding - } - - @Override - Object getProperty(String name) { - try { - return binding.getProperty(name) - } - catch( MissingPropertyException e ){ - return super.getProperty(name) - } - } - - void directory(Map defaults=[:], String directory) { - if( directoryOnce ) - throw new ScriptRuntimeException("Output directory cannot be defined more than once in the workflow output definition") - directoryOnce = true - - this.directory = (directory as Path).complete() - this.defaults = defaults - } - - void path(String path, Closure closure) { - final dsl = new PathDsl(directory.resolve(path), defaults) - final cl = (Closure)closure.clone() - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) - cl.call() - } - - class PathDsl { - - private Path path - private Map defaults - private boolean defaultsOnce = false - - PathDsl(Path path, Map defaults) { - this.path = path - this.defaults = defaults - } - - void defaults(Map opts) { - if( defaultsOnce ) - throw new ScriptRuntimeException("Publish defaults cannot be defined more than once for a given path") - defaultsOnce = true - - validatePublishOptions(opts) - defaults.putAll(opts) - } - - void path(String subpath, Closure closure) { - final dsl = new PathDsl(path.resolve(subpath), defaults) - final cl = (Closure)closure.clone() - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) - cl.call() - } - - void from(Map opts=[:], DataflowWriteChannel source) { - validatePublishOptions(opts) - if( opts.ignoreErrors ) - opts.failOnError = !opts.remove('ignoreErrors') - new PublishOp(CH.getReadChannel(source), defaults + opts + [path: path]).apply() - } - - void from(Map opts=[:], ChannelOut out) { - if( out.size() != 1 ) - throw new IllegalArgumentException("Cannot publish a multi-channel output") - from(opts, out[0]) - } - - void from(Map opts=[:], String name) { - if( !NF.topicChannelEnabled ) throw new ScriptRuntimeException("Topic selector in workflow output definition requires the `nextflow.preview.topic` feature flag") - from(opts, CH.topic(name)) - } - - private void validatePublishOptions(Map opts) { - for( final name : opts.keySet() ) - if( name !in PUBLISH_OPTIONS ) - throw new IllegalArgumentException("Unrecognized publish option '${name}' in the workflow output definition") - } - - } - -} diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index 61ee4bbb5e..1f81788f61 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -nextflow.preview.topic = true + process align { input: @@ -67,22 +67,21 @@ workflow { foo() - topic: - foo.out >> 'foo' + publish: + align.out[0] >> 'data' + align.out[1] >> 'data' + my_combine.out >> 'more/data' + foo.out >> 'data' } output { directory 'results' 'data' { - from align.out[0], mode: 'copy' - from align.out[1], mode: 'copy' - from my_combine.out - from 'foo', mode: 'link' + mode 'link' } - 'data/more' { - defaults mode: 'copy' - from my_combine.out + 'more/data' { + mode 'copy' } } From b67376c43a6b12edc9affb9a31e7220e2d042f36 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 12 Apr 2024 07:22:40 -0500 Subject: [PATCH 22/47] Add `publish:` section to process Signed-off-by: Ben Sherman --- .../nextflow/ast/NextflowDSLImpl.groovy | 26 +++++++++++++++++++ .../nextflow/script/ProcessConfig.groovy | 13 ++++++++++ .../groovy/nextflow/script/ProcessDef.groovy | 13 ++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index 92232e32e3..043d85520e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -633,6 +633,11 @@ class NextflowDSLImpl implements ASTTransformation { } break + case 'publish': + if( stm instanceof ExpressionStatement ) + convertPublishMethod( stm ) + break + case 'exec': bodyLabel = currentLabel iterator.remove() @@ -1278,6 +1283,27 @@ class NextflowDSLImpl implements ASTTransformation { return false } + protected void convertPublishMethod(ExpressionStatement stmt) { + if( stmt.expression !instanceof BinaryExpression ) { + syntaxError(stmt, "Invalid process publish statement") + return + } + + final binaryX = (BinaryExpression)stmt.expression + if( binaryX.operation.type != Types.RIGHT_SHIFT ) { + syntaxError(stmt, "Invalid process publish statement") + return + } + + final left = binaryX.leftExpression + if( left !instanceof VariableExpression ) { + syntaxError(stmt, "Invalid process publish statement") + return + } + + stmt.expression = callThisX('_into_publish', args(constX(((VariableExpression)left).name), binaryX.rightExpression)) + } + protected boolean isIllegalName(String name, ASTNode node) { if( name in RESERVED_NAMES ) { unit.addError( new SyntaxException("Identifier `$name` is reserved for internal use", node.lineNumber, node.columnNumber+8) ) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy index 67bac675a7..e345d12b06 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy @@ -162,6 +162,8 @@ class ProcessConfig implements Map, Cloneable { */ private outputs = new OutputsList() + private Map publishRules = [:] + /** * Initialize the taskConfig object with the defaults values * @@ -512,6 +514,10 @@ class ProcessConfig implements Map, Cloneable { outputs } + Map getPublishRules() { + publishRules + } + /** * Implements the process {@code debug} directive. */ @@ -649,6 +655,13 @@ class ProcessConfig implements Map, Cloneable { result } + void _into_publish(String emit, String name) { + final emitNames = outputs.collect { param -> param.channelEmitName } + if( emit !in emitNames ) + throw new IllegalArgumentException("Invalid emit name '${emit}' in publish statement, valid emits are: ${emitNames.join(', ')}") + publishRules[emit] = name + } + /** * Defines a special *dummy* input parameter, when no inputs are * provided by the user for the current task diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy index 3c54f0e426..c393fc5503 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy @@ -18,6 +18,7 @@ package nextflow.script import groovy.transform.CompileStatic import groovy.util.logging.Slf4j +import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.Const import nextflow.Global import nextflow.Session @@ -206,7 +207,15 @@ class ProcessDef extends BindableDef implements IterableDef, ChainableDef { } // make a copy of the output list because execution can change it - final copyOuts = declaredOutputs.clone() + output = new ChannelOut(declaredOutputs.clone()) + + // register process publish rules + for( final entry : processConfig.getPublishRules() ) { + final emit = entry.key + final name = entry.value + final source = (DataflowWriteChannel)output.getProperty(emit) + session.publishRules[source] = name + } // create the executor final executor = session @@ -221,7 +230,7 @@ class ProcessDef extends BindableDef implements IterableDef, ChainableDef { // the result channels assert declaredOutputs.size()>0, "Process output should contains at least one channel" - return output = new ChannelOut(copyOuts) + return output } } From 65d9111bb571083ab5c8b92c57ad28443d412ee7 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 12 Apr 2024 10:51:32 -0500 Subject: [PATCH 23/47] Update docs Signed-off-by: Ben Sherman --- docs/channel.md | 41 +++----- docs/process.md | 2 +- docs/workflow.md | 267 +++++++++++++++-------------------------------- 3 files changed, 104 insertions(+), 206 deletions(-) diff --git a/docs/channel.md b/docs/channel.md index e366fea419..5d615d1371 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -467,9 +467,17 @@ See also: [channel.fromList](#fromlist) factory method. This feature requires the `nextflow.preview.topic` feature flag to be enabled. ::: -A *topic channel*, similar to a *queue channel*, is a non-blocking unidirectional FIFO queue, with the ability to implicitly receive values from multiple sources based on a *topic name*. +A *topic* is a channel type introduced as of Nextflow 23.11.0-edge along with {ref}`channel-type-value` and +{ref}`channel-type-queue`. -A process output can be sent to a topic using the `topic` option, for example: +A *topic channel*, similarly to a *queue channel*, is non-blocking unidirectional FIFO queue, however it connects +multiple *producer* processes with multiple *consumer* processes or operators. + +:::{tip} +You can think about it as a channel that is shared across many different process using the same *topic name*. +::: + +A process output can be assigned to a topic using the `topic` option on an output, for example: ```groovy process foo { @@ -483,38 +491,21 @@ process bar { } ``` -Additionally, the `topic:` section of a workflow definition can be used to send channels defined in a workflow to a topic: - -```groovy -workflow foobar { - main: - foo() - bar() - - topic: - foo.out >> 'my_topic' - bar.out >> 'my_topic' - - emit: - bar.out -} -``` - -Finally, the `Channel.topic()` factory can be used to consume the resulting channel for a given topic name, which can be used like any other channel: +The `channel.topic` method allows referencing the topic channel with the specified name, which can be used as a process +input or operator composition as any other Nextflow channel: ```groovy channel.topic('my-topic').view() ``` -The same topic can be consumed using `Channel.topic()` any number of times, similar to referencing a channel multiple times. - -This approach is a convenient way to collect related items from many different sources without all of the logic that is required to connect them, e.g. using the `mix` operator. +This approach is a convenient way to collect related items from many different sources without explicitly defining +the logic connecting many different queue channels altogether, commonly using the `mix` operator. :::{warning} -Avoid creating a circular dependency within a topic (e.g. a process that consumes a channel topic and sends outputs to that same topic), as it will cause the pipeline to run forever. +Any process that consumes a channel topic should not send any outputs to that topic, or else the pipeline will hang forever. ::: -See also: {ref}`process-additional-options` for process outputs and the {ref}`workflow topic section `. +See also: {ref}`process-additional-options` for process outputs. (channel-value)= diff --git a/docs/process.md b/docs/process.md index 9a57dcf68b..30375ca5c0 100644 --- a/docs/process.md +++ b/docs/process.md @@ -2164,7 +2164,7 @@ The following options are available: ### publishDir :::{deprecated} 24.04.0 -The `publishDir` directive has been deprecated in favor of the new {ref}`workflow output definition `. +The `publishDir` directive has been deprecated in favor of the {ref}`workflow output definition `. ::: The `publishDir` directive allows you to publish the process output files to a specified folder. For example: diff --git a/docs/workflow.md b/docs/workflow.md index bf493c09c7..b380ce7f79 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -147,35 +147,6 @@ workflow my_pipeline { The result of the above workflow can be accessed using `my_pipeline.out.my_data`. -(workflow-topics)= - -## Workflow topics (`topic`) - -:::{versionadded} 24.04.0 -::: - -:::{note} -This feature requires the `nextflow.preview.topic` feature flag to be enabled. -::: - -The `topic` section can be used to send channels defined in a workflow, including process and sub-workflow outputs, into a topic. For example: - -```groovy -workflow my_pipeline { - main: - foo(data) - bar(foo.out) - - topic: - foo.out >> 'foo' - - emit: - bar.out -} -``` - -In the above example, the channel `foo.out` (assumed to be a single channel) is sent to topic `foo`, without being emitted as a workflow output. - (workflow-process-invocation)= ## Invoking processes @@ -362,237 +333,173 @@ workflow { output { directory 'results' - - 'foo' { - select foo.out - } - - 'bar' { - defaults mode: 'copy', pattern: '*.txt' - select bar.out - } } ``` The output block must be defined after the implicit workflow. -### Output directory +### Publishing channels -The `directory` statement is used to set the top-level output directory of the workflow: +Processes and workflows can each define a `publish` section which maps channels to publish rules. For example: ```groovy -output { - directory 'results' - +process foo { // ... -} -``` - -It is optional, and it defaults to the launch directory (`workflow.launchDir`). - -### Path definitions - -Path definitions are used to define the directory structure of the published outputs. A path definition is a path name followed by a block which defines the outputs to be published within that path. Like directories, path definitions can be nested. - -The path name defines a subdirectory within the output directory, or the parent path if the path definition is nested. - -For example, given the following output block: - -```groovy -output { - directory 'results' - 'foo' { - // ... - } + output: + path 'result.txt', emit: results - 'bar' { - // ... + publish: + results >> 'foo/' - 'baz' { - // ... - } - } + // ... } -``` - -The following directory structure will be created by the workflow: - -``` -results/ -└── foo/ - └── ... -└── bar/ - └── baz/ - └── ... - └── ... -``` -The path name may also contain multiple subdirectories separated by a slash `/`: - -```groovy -output { - 'foo/bar/baz' { - // ... - } -} -``` +workflow foobar { + main: + foo(data) + bar(foo.out) -It is a shorthand for the following: + publish: + foo.out >> 'foobar/foo/' -```groovy -output { - 'foo' { - 'bar' { - 'baz' { - // ... - } - } - } + emit: + bar.out } ``` -### Selecting channels +In the above example, the output `results` of process `foo` is published to the rule `foo/` by default. However, when the workflow `foobar` invokes process `foo`, it publishes `foo.out` (i.e. `foo.out.results`) to the rule `foobar/foo/`, overriding the default rule defined by `foo`. -The `from` statement is used to select channels to publish: - -```groovy -output { - 'foo' { - from foo.out - } -} -``` - -Any channel defined in the implicit workflow can be selected, including process and workflow outputs. +In a process, any output with an `emit` name can be published. In a workflow, any channel defined in the workflow, including process and subworkflow outputs, can be published. :::{note} -A process/workflow output (e.g. `foo.out`) can only be selected directly if it contains a single output channel. Multi-channel outputs must be selected by index or name, e.g. `foo.out[0]` or `foo.out.samples`. +A process/workflow output (e.g. `foo.out`) can only be published directly if it contains a single output channel. Multi-channel outputs must be published by index or name (e.g. `foo.out[0]` or `foo.out.results`). ::: -By default, all files emitted by the channel will be published into the specified directory. If a list value emitted by the channel contains any files, including files within nested lists, they will also be published. For example: +As shown in the example, workflows can override the publish rules of process and subworkflow outputs. This way, each process and workflow can define some sensible defaults for publishing, which can be overridden by calling workflows as needed. + +By default, all files emitted by the channel will be published into the specified directory. If a channel emits list values, any files in the list (including nested lists) will also be published. For example: ```groovy workflow { ch_samples = Channel.of( [ [id: 'sample1'], file('sample1.txt') ] ) -} -output { - 'samples' { - // sample1.txt will be published - from ch_samples - } + publish: + ch_samples >> 'samples/' // sample1.txt will be published } ``` -The publishing behavior can be customized further by using [publish options](#publish-options). See that section for more details. - -### Selecting topics - -:::{note} -This feature requires the `nextflow.preview.topic` feature flag to be enabled. -::: +### Output directory -The `from` statement can also be used to select a topic by name: +The `directory` statement is used to set the top-level output directory of the workflow: ```groovy output { - 'samples' { - from 'samples' + directory 'results' - // equivalent to: - from Channel.topic('samples') - } + // ... } ``` -Topics are a useful way to publish channels which are deeply nested within workflows, without needing to propagate them to the top-level workflow. You can use the `topic:` workflow section, or the `topic` option for {ref}`process outputs `, to send a channel to a given topic. +It is optional, and it defaults to the launch directory (`workflow.launchDir`). Published files will be published into this directory. -### Publish options +### Publish rules -The publishing behavior can be configured using a set of options similar to those for the {ref}`process-publishdir` directive. +A publish rule is a specific publish configuration identified by a name. By default, when a channel is published to a rule in the `publish:` section of a process or workflow, the rule name is used as the publish path. -There are several ways to define publish options: +For example, given the following output block: -- The `directory` statement +```groovy +workflow { + ch_foo = foo() + ch_bar = bar(ch_foo) -- The `defaults` statement, which defines publish options for a path definition + publish: + ch_foo >> 'foo/' + ch_bar >> 'bar/' +} -- The `from` statement, which defines publish options for an individual selector +output { + directory 'results' +} +``` -Publish options are resolved in a cascading manner, in which more specific settings take priority. +The following directory structure will be created: -Consider the following example: +``` +results/ +└── foo/ + └── ... +└── bar/ + └── ... +``` + +:::{tip} +The trailing slash in the rule name is not required; it is only used to denote that the rule name is intended to be used as the publish path. In general, the rule name can be any string, but it should be a valid path name when using the default publishing behavior. +::: + +Publish rules can also be customized in the `output` block using a set of options similar to the {ref}`process-publishdir` directive. + +For example: ```groovy output { - directory 'results', mode: 'copy' - - 'samples' { - from ch_samples, pattern: '*.txt', mode: 'link' + directory 'results' + mode 'copy' - 'md5' { - defaults mode: 'link' - // ... - from 'md5', mode: 'copy' - } + 'foo/' { + enabled params.save_foo + mode 'link' } } ``` -In this example, the following rules are applied: +In this example, the following publish options are applied: - All files will be copied by default -- The channel selector `from ch_samples` will publish via hard link, overriding the output directory default. Additionally, only files matching the pattern `*.txt` will be published. - -- All files published to `samples/md5` will be hard-linked by default, overriding the output directory default. - -- The topic selector `from 'md5'` will publish via copy, overriding the default from `samples/md5`. +- Files published to `foo/` will be hard-linked, overriding the default option. Additionally, these files will be published only if `params.save_foo` is true. Available options: `contentType` -: :::{versionadded} 22.10.0 - ::: -: *Experimental: currently only supported for S3.* -: Allow specifying the media content type of the published file a.k.a. [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_Types). If set to `true`, the content type is inferred from the file extension (default: `false`). +: *Currently only supported for S3.* +: Specify the media type a.k.a. [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_Types) of published files (default: `false`). Can be a string (e.g. `'text/html'`), or `true` to infer the content type from the file extension. `enabled` : Enable or disable publishing (default: `true`). `ignoreErrors` -: When `true`, the pipeline will not fail if a file can't be published for any reason (default: `false`). +: When `true`, the workflow will not fail if a file can't be published for some reason (default: `false`). `mode` -: The file publishing method. Can be one of the following values: +: The file publishing method (default: `'symlink'`). Can be one of the following values: - - `'copy'`: Copies the output files into the publish directory. - - `'copyNoFollow'`: Copies the output files into the publish directory without following symlinks ie. copies the links themselves. - - `'link'`: Creates a hard link in the publish directory for each output file. - - `'move'`: Moves the output files into the publish directory. **Note**: this is only supposed to be used for a *terminal* process i.e. a process whose output is not consumed by any other downstream process. - - `'rellink'`: Creates a relative symbolic link in the publish directory for each output file. - - `'symlink'`: Creates an absolute symbolic link in the publish directory for each output file (default). + - `'copy'`: Copy each file into the output directory. + - `'copyNoFollow'`: Copy each file into the output directory without following symlinks, i.e. only the link is copied. + - `'link'`: Create a hard link in the output directory for each file. + - `'move'`: Move each file into the output directory. **Note**: should only be used for files which are not used by downstream processes in the workflow. + - `'rellink'`: Create a relative symbolic link in the output directory for each file. + - `'symlink'`: Create an absolute symbolic link in the output directory for each output file. `overwrite` -: When `true` any existing file in the specified folder will be overwritten (default: `false` if the task was cached on a resumed run, `true` otherwise). +: When `true` any existing file in the specified folder will be overwritten (default: `true`). -`pattern` -: Specifies a [glob][http://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob] file pattern that selects which files to publish from the source channel. +`path` +: Specify the publish path relative to the output directory (default: the rule name). Can only be specified within a rule. `storageClass` -: :::{versionadded} 22.12.0-edge - ::: -: *Experimental: currently only supported for S3.* -: Allow specifying the storage class to be used for the published file. +: *Currently only supported for S3.* +: Specify the storage class for published files. `tags` -: :::{versionadded} 21.12.0-edge - ::: -: *Experimental: currently only supported for S3.* -: Allow the association of arbitrary tags with the published file e.g. `tags: [FOO: 'Hello world']`. +: *Currently only supported for S3.* +: Specify arbitrary tags for published files. For example: + ```groovy + tags FOO: 'hello', BAR: 'world' + ``` ## Special operators From 35df9248ac3955e3d9cf943090c661e69f03e2b5 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 12 Apr 2024 10:51:43 -0500 Subject: [PATCH 24/47] Add publish options to output DSL Signed-off-by: Ben Sherman --- .../groovy/nextflow/script/OutputDef.groovy | 79 +++++++++++++------ 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy index 9b056e7c1a..c148145f54 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy @@ -73,15 +73,36 @@ class OutputDsl { this.directory = (directory as Path).complete() } - // TODO: other publish options - // - contentType - // - ignoreErrors - // - overwrite - // - storageClass - // - tags + void contentType(String value) { + setDefault('contentType', value) + } + + void contentType(boolean value) { + setDefault('contentType', value) + } + + void enabled(boolean value) { + setDefault('enabled', value) + } + + void ignoreErrors(boolean value) { + setDefault('ignoreErrors', value) + } + + void mode(String value) { + setDefault('mode', value) + } + + void overwrite(boolean value) { + setDefault('overwrite', value) + } - void mode(String mode) { - setDefault('mode', mode) + void storageClass(String value) { + setDefault('storageClass', value) + } + + void tags(Map value) { + setDefault('tags', value) } private void setDefault(String name, Object value) { @@ -126,32 +147,42 @@ class OutputDsl { static class RuleDsl { - // TODO: - // - contentType - // - pattern (?) - // - storageClass - // - tags - private Map opts = [:] - void enabled(boolean enabled) { - setOption('enabled', enabled) + void contentType(String value) { + setOption('contentType', value) + } + + void contentType(boolean value) { + setOption('contentType', value) + } + + void enabled(boolean value) { + setOption('enabled', value) + } + + void ignoreErrors(boolean value) { + setOption('ignoreErrors', value) + } + + void mode(String value) { + setOption('mode', value) } - void ignoreErrors(boolean ignoreErrors) { - setOption('ignoreErrors', ignoreErrors) + void overwrite(boolean value) { + setOption('overwrite', value) } - void mode(String mode) { - setOption('mode', mode) + void path(String value) { + setOption('path', value) } - void overwrite(boolean overwrite) { - setOption('overwrite', overwrite) + void storageClass(String value) { + setOption('storageClass', value) } - void path(String path) { - setOption('path', path) + void tags(Map value) { + setOption('tags', value) } private void setOption(String name, Object value) { From 509f271e70bd99f0dd867ea07ecf9c7831342091 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 12 Apr 2024 10:52:08 -0500 Subject: [PATCH 25/47] Change publish op to handle multiple task dirs Signed-off-by: Ben Sherman --- .../nextflow/extension/PublishOp.groovy | 61 ++++++++----------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index b70758bd33..0b330de2b3 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -34,57 +34,54 @@ class PublishOp { private DataflowReadChannel source - private Map opts - private PublishDir publisher - private Path sourceDir - private volatile boolean complete private Session getSession() { Global.session as Session } PublishOp(DataflowReadChannel source, Map opts) { this.source = source - this.opts = opts ? new LinkedHashMap(opts) : Collections.emptyMap() - this.publisher = PublishDir.create(this.opts) + this.publisher = PublishDir.create(opts) } - protected boolean getComplete() { complete } + boolean getComplete() { complete } PublishOp apply() { final events = new HashMap(2) - events.onNext = this.&publish0 - events.onComplete = this.&done0 + events.onNext = this.&onNext + events.onComplete = this.&onComplete DataflowHelper.subscribeImpl(source, events) return this } - protected void publish0(entry) { - log.trace "Publish operator received: $entry" - sourceDir = null - // use a set to avoid duplicates - final result = new HashSet(10) - collectFiles(entry, result) - publisher.apply(result, sourceDir) + protected void onNext(value) { + log.trace "Publish operator received: $value" + final result = collectFiles([:], value) + for( final entry : result ) { + final sourceDir = entry.key + final files = entry.value + publisher.apply(files, sourceDir) + } } - protected void done0(nope) { + protected void onComplete(nope) { log.trace "Publish operator complete" this.complete = true } - protected void collectFiles(entry, Collection result) { - if( entry instanceof Path ) { - result.add(entry) - if( sourceDir == null ) - sourceDir = getTaskDir(entry) + protected Map> collectFiles(Map> result, value) { + if( value instanceof Path ) { + final sourceDir = getTaskDir(value) + if( sourceDir !in result ) + result[sourceDir] = new HashSet(10) + result[sourceDir] << value } - else if( entry instanceof List ) { - for( def x : entry ) { - collectFiles(x, result) - } + else if( value instanceof Collection ) { + for( final el : value ) + collectFiles(result, el) } + return result } /** @@ -93,17 +90,13 @@ class PublishOp { * two sub-directories eg work-dir/xx/yyyyyy/etc * * @param path - * @return */ protected Path getTaskDir(Path path) { if( path == null ) return null - def result = getTaskDir0(path, session.workDir.resolve('tmp')) - if( result == null ) - result = getTaskDir0(path, session.workDir) - if( result == null ) - result = getTaskDir0(path, session.bucketDir) - return result + return getTaskDir0(path, session.workDir.resolve('tmp')) + ?: getTaskDir0(path, session.workDir) + ?: getTaskDir0(path, session.bucketDir) } private Path getTaskDir0(Path file, Path base) { @@ -112,7 +105,7 @@ class PublishOp { if( base.fileSystem != file.fileSystem ) return null final len = base.nameCount - if( file.startsWith(base) && file.getNameCount()>len+2 ) + if( file.startsWith(base) && file.getNameCount() > len+2 ) return base.resolve(file.subpath(len,len+2)) return null } From 6fcbf6f61932683f6984f1dc692bb794fcc0fda5 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 15 Apr 2024 12:48:01 -0500 Subject: [PATCH 26/47] Fix error when no output block is specified Signed-off-by: Ben Sherman --- .../nextflow/src/main/groovy/nextflow/script/BaseScript.groovy | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy index 9f46abe314..8b7a2e0329 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy @@ -193,7 +193,8 @@ abstract class BaseScript extends Script implements ExecutionContext { // invoke the entry workflow session.notifyBeforeWorkflowExecution() final ret = entryFlow.invoke_a(BaseScriptConsts.EMPTY_ARGS) - publisher.run(session.publishRules) + if( publisher ) + publisher.run(session.publishRules) session.notifyAfterWorkflowExecution() return ret } From 550766a31eae9ff78274ba31a118a40ca93141ab Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 15 Apr 2024 13:45:59 -0500 Subject: [PATCH 27/47] Rename output -> publish, rule -> target Signed-off-by: Ben Sherman --- docs/process.md | 2 +- docs/workflow.md | 40 ++++++++--------- .../src/main/groovy/nextflow/Session.groovy | 2 +- .../nextflow/ast/NextflowDSLImpl.groovy | 44 +++++++++---------- .../groovy/nextflow/script/BaseScript.groovy | 12 ++--- .../nextflow/script/ProcessConfig.groovy | 16 ++++--- .../groovy/nextflow/script/ProcessDef.groovy | 6 +-- .../{OutputDef.groovy => PublishDef.groovy} | 40 ++++++++--------- .../nextflow/script/WorkflowBinding.groovy | 12 +++-- tests/publish-dsl.nf | 2 +- 10 files changed, 90 insertions(+), 86 deletions(-) rename modules/nextflow/src/main/groovy/nextflow/script/{OutputDef.groovy => PublishDef.groovy} (79%) diff --git a/docs/process.md b/docs/process.md index 30375ca5c0..203de7a8e9 100644 --- a/docs/process.md +++ b/docs/process.md @@ -2164,7 +2164,7 @@ The following options are available: ### publishDir :::{deprecated} 24.04.0 -The `publishDir` directive has been deprecated in favor of the {ref}`workflow output definition `. +The `publishDir` directive has been deprecated in favor of the {ref}`workflow publish definition `. ::: The `publishDir` directive allows you to publish the process output files to a specified folder. For example: diff --git a/docs/workflow.md b/docs/workflow.md index b380ce7f79..6a05dd4156 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -317,30 +317,30 @@ Each workflow invocation has its own scope. As a result, the same process can be The fully qualified process name can be used as a {ref}`process selector ` in a Nextflow configuration file, and it takes priority over the simple process name. ::: -(workflow-output-dsl)= +(workflow-publish-def)= ## Publishing outputs :::{versionadded} 24.04.0 ::: -A script may define the set of outputs that should be published by the implicit workflow, known as the workflow output definition or "output block": +A script may define the set of outputs that should be published by the implicit workflow, known as the workflow publish definition: ```groovy workflow { foo(bar()) } -output { +publish { directory 'results' } ``` -The output block must be defined after the implicit workflow. +The publish definition must be defined after the implicit workflow. ### Publishing channels -Processes and workflows can each define a `publish` section which maps channels to publish rules. For example: +Processes and workflows can each define a `publish` section which maps channels to publish targets. For example: ```groovy process foo { @@ -368,7 +368,7 @@ workflow foobar { } ``` -In the above example, the output `results` of process `foo` is published to the rule `foo/` by default. However, when the workflow `foobar` invokes process `foo`, it publishes `foo.out` (i.e. `foo.out.results`) to the rule `foobar/foo/`, overriding the default rule defined by `foo`. +In the above example, the output `results` of process `foo` is published to the target `foo/` by default. However, when the workflow `foobar` invokes process `foo`, it publishes `foo.out` (i.e. `foo.out.results`) to the target `foobar/foo/`, overriding the default target defined by `foo`. In a process, any output with an `emit` name can be published. In a workflow, any channel defined in the workflow, including process and subworkflow outputs, can be published. @@ -376,7 +376,7 @@ In a process, any output with an `emit` name can be published. In a workflow, an A process/workflow output (e.g. `foo.out`) can only be published directly if it contains a single output channel. Multi-channel outputs must be published by index or name (e.g. `foo.out[0]` or `foo.out.results`). ::: -As shown in the example, workflows can override the publish rules of process and subworkflow outputs. This way, each process and workflow can define some sensible defaults for publishing, which can be overridden by calling workflows as needed. +As shown in the example, workflows can override the publish targets of process and subworkflow outputs. This way, each process and workflow can define some sensible defaults for publishing, which can be overridden by calling workflows as needed. By default, all files emitted by the channel will be published into the specified directory. If a channel emits list values, any files in the list (including nested lists) will also be published. For example: @@ -391,25 +391,25 @@ workflow { } ``` -### Output directory +### Publish directory -The `directory` statement is used to set the top-level output directory of the workflow: +The `directory` statement is used to set the top-level publish directory of the workflow: ```groovy -output { +publish { directory 'results' // ... } ``` -It is optional, and it defaults to the launch directory (`workflow.launchDir`). Published files will be published into this directory. +It is optional, and it defaults to the launch directory (`workflow.launchDir`). Published files will be saved within this directory. -### Publish rules +### Publish targets -A publish rule is a specific publish configuration identified by a name. By default, when a channel is published to a rule in the `publish:` section of a process or workflow, the rule name is used as the publish path. +A publish target is a name with a specific publish configuration. By default, when a channel is published to a target in the `publish:` section of a process or workflow, the target name is used as the publish path. -For example, given the following output block: +For example, given the following publish definition: ```groovy workflow { @@ -421,7 +421,7 @@ workflow { ch_bar >> 'bar/' } -output { +publish { directory 'results' } ``` @@ -436,16 +436,16 @@ results/ └── ... ``` -:::{tip} -The trailing slash in the rule name is not required; it is only used to denote that the rule name is intended to be used as the publish path. In general, the rule name can be any string, but it should be a valid path name when using the default publishing behavior. +:::{note} +The trailing slash in the target name is not required; it is only used to denote that the target name is intended to be used as the publish path. In general, the target name can be any string, but it should be a valid path name when using the default publishing behavior. ::: -Publish rules can also be customized in the `output` block using a set of options similar to the {ref}`process-publishdir` directive. +Publish targets can also be customized in the publish definition using a set of options similar to the {ref}`process-publishdir` directive. For example: ```groovy -output { +publish { directory 'results' mode 'copy' @@ -488,7 +488,7 @@ Available options: : When `true` any existing file in the specified folder will be overwritten (default: `true`). `path` -: Specify the publish path relative to the output directory (default: the rule name). Can only be specified within a rule. +: Specify the publish path relative to the output directory (default: the target name). Can only be specified within a target definition. `storageClass` : *Currently only supported for S3.* diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 5ab61e41c7..ddba6c344d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -95,7 +95,7 @@ class Session implements ISession { final List igniters = new ArrayList<>(20) - final Map publishRules = [:] + final Map publishTargets = [:] /** * Creates process executors diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index 043d85520e..7f058b859d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -179,8 +179,8 @@ class NextflowDSLImpl implements ASTTransformation { super.visitMethodCallExpression(methodCall) } - else if( methodName == 'output' && preCondition ) { - convertOutputDef(methodCall,sourceUnit) + else if( methodName == 'publish' && preCondition ) { + convertPublishDef(methodCall,sourceUnit) super.visitMethodCallExpression(methodCall) } @@ -432,17 +432,17 @@ class NextflowDSLImpl implements ASTTransformation { protected Statement normWorkflowPublish(ExpressionStatement stm) { if( stm.expression !instanceof BinaryExpression ) { - syntaxError(stm, "Workflow malformed publish statement") + syntaxError(stm, "Invalid workflow publish statement") return stm } final binaryX = (BinaryExpression)stm.expression if( binaryX.operation.type != Types.RIGHT_SHIFT ) { - syntaxError(stm, "Workflow malformed publish statement") + syntaxError(stm, "Invalid workflow publish statement") return stm } - return stmt( callThisX('_into_publish', args(binaryX.leftExpression, binaryX.rightExpression)) ) + return stmt( callThisX('_publish_target', args(binaryX.leftExpression, binaryX.rightExpression)) ) } protected Expression makeWorkflowDefWrapper( ClosureExpression closure, boolean anonymous ) { @@ -486,7 +486,7 @@ class NextflowDSLImpl implements ASTTransformation { case WORKFLOW_PUBLISH: if( !(stm instanceof ExpressionStatement) ) { - syntaxError(stm, "Workflow malformed publish statement") + syntaxError(stm, "Invalid workflow publish statement") break } body.add(normWorkflowPublish(stm as ExpressionStatement)) @@ -519,29 +519,29 @@ class NextflowDSLImpl implements ASTTransformation { } /** - * Transform rules in the workflow output definition: + * Transform targets in the workflow publish definition: * - * output { + * publish { * 'foo' { ... } * } * * becomes: * - * output { - * rule('foo') { ... } + * publish { + * target('foo') { ... } * } * * @param methodCall * @param unit */ - protected void convertOutputDef(MethodCallExpression methodCall, SourceUnit unit) { - log.trace "Convert 'output' ${methodCall.arguments}" + protected void convertPublishDef(MethodCallExpression methodCall, SourceUnit unit) { + log.trace "Convert 'publish' ${methodCall.arguments}" assert methodCall.arguments instanceof ArgumentListExpression final arguments = (ArgumentListExpression)methodCall.arguments if( arguments.size() != 1 || arguments[0] !instanceof ClosureExpression ) { - syntaxError(methodCall, "Invalid output definition") + syntaxError(methodCall, "Invalid publish definition") return } @@ -549,28 +549,28 @@ class NextflowDSLImpl implements ASTTransformation { final block = (BlockStatement)closure.code for( Statement stmt : block.statements ) { if( stmt !instanceof ExpressionStatement ) { - syntaxError(stmt, "Invalid output rule definition") + syntaxError(stmt, "Invalid publish target definition") return } final stmtExpr = (ExpressionStatement)stmt if( stmtExpr.expression !instanceof MethodCallExpression ) { - syntaxError(stmt, "Invalid output rule definition") + syntaxError(stmt, "Invalid publish target definition") return } final call = (MethodCallExpression)stmtExpr.expression assert call.arguments instanceof ArgumentListExpression - // HACK: rule definition is a method call with single closure argument + // HACK: target definition is a method call with single closure argument // custom parser will be able to detect more elegantly - final ruleArgs = (ArgumentListExpression)call.arguments - if( ruleArgs.size() != 1 || ruleArgs[0] !instanceof ClosureExpression ) + final targetArgs = (ArgumentListExpression)call.arguments + if( targetArgs.size() != 1 || targetArgs[0] !instanceof ClosureExpression ) continue - final ruleName = call.method - final ruleBody = (ClosureExpression)ruleArgs[0] - stmtExpr.expression = callThisX('rule', args(ruleName, ruleBody)) + final targetName = call.method + final targetBody = (ClosureExpression)targetArgs[0] + stmtExpr.expression = callThisX('target', args(targetName, targetBody)) } } @@ -1301,7 +1301,7 @@ class NextflowDSLImpl implements ASTTransformation { return } - stmt.expression = callThisX('_into_publish', args(constX(((VariableExpression)left).name), binaryX.rightExpression)) + stmt.expression = callThisX('_publish_target', args(constX(((VariableExpression)left).name), binaryX.rightExpression)) } protected boolean isIllegalName(String name, ASTNode node) { diff --git a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy index 8b7a2e0329..cf2031f184 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy @@ -41,7 +41,7 @@ abstract class BaseScript extends Script implements ExecutionContext { private WorkflowDef entryFlow - private OutputDef publisher + private PublishDef publisher @Lazy InputStream stdin = { System.in }() @@ -122,13 +122,13 @@ abstract class BaseScript extends Script implements ExecutionContext { meta.addDefinition(workflow) } - protected output(Closure closure) { + protected publish(Closure closure) { if( !entryFlow ) - throw new IllegalStateException("Workflow output definition must be defined after the anonymous workflow") + throw new IllegalStateException("Workflow publish definition must be defined after the anonymous workflow") if( ExecutionStack.withinWorkflow() ) - throw new IllegalStateException("Workflow output definition is not allowed within a workflow") + throw new IllegalStateException("Workflow publish definition is not allowed within a workflow") - publisher = new OutputDef(closure) + publisher = new PublishDef(closure) } protected IncludeDef include( IncludeDef include ) { @@ -194,7 +194,7 @@ abstract class BaseScript extends Script implements ExecutionContext { session.notifyBeforeWorkflowExecution() final ret = entryFlow.invoke_a(BaseScriptConsts.EMPTY_ARGS) if( publisher ) - publisher.run(session.publishRules) + publisher.run(session.publishTargets) session.notifyAfterWorkflowExecution() return ret } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy index e345d12b06..fd7ed72f7b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy @@ -162,7 +162,10 @@ class ProcessConfig implements Map, Cloneable { */ private outputs = new OutputsList() - private Map publishRules = [:] + /** + * Map of default publish targets + */ + private Map publishTargets = [:] /** * Initialize the taskConfig object with the defaults values @@ -514,8 +517,11 @@ class ProcessConfig implements Map, Cloneable { outputs } - Map getPublishRules() { - publishRules + /** + * Typed shortcut to {@code #publishTargets} + */ + Map getPublishTargets() { + publishTargets } /** @@ -655,11 +661,11 @@ class ProcessConfig implements Map, Cloneable { result } - void _into_publish(String emit, String name) { + void _publish_target(String emit, String name) { final emitNames = outputs.collect { param -> param.channelEmitName } if( emit !in emitNames ) throw new IllegalArgumentException("Invalid emit name '${emit}' in publish statement, valid emits are: ${emitNames.join(', ')}") - publishRules[emit] = name + publishTargets[emit] = name } /** diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy index c393fc5503..f7e59b371e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ProcessDef.groovy @@ -209,12 +209,12 @@ class ProcessDef extends BindableDef implements IterableDef, ChainableDef { // make a copy of the output list because execution can change it output = new ChannelOut(declaredOutputs.clone()) - // register process publish rules - for( final entry : processConfig.getPublishRules() ) { + // register process publish targets + for( final entry : processConfig.getPublishTargets() ) { final emit = entry.key final name = entry.value final source = (DataflowWriteChannel)output.getProperty(emit) - session.publishRules[source] = name + session.publishTargets[source] = name } // create the executor diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy similarity index 79% rename from modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy rename to modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index c148145f54..9bfcc863b2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -26,28 +26,28 @@ import nextflow.exception.ScriptRuntimeException import nextflow.extension.CH import nextflow.extension.PublishOp /** - * Models a workflow output definition + * Models the workflow publish definition * * @author Ben Sherman */ @Slf4j @CompileStatic -class OutputDef { +class PublishDef { private Closure closure - OutputDef(Closure closure) { + PublishDef(Closure closure) { this.closure = closure } - void run(Map sources) { - final dsl = new OutputDsl() + void run(Map targets) { + final dsl = new PublishDsl() final cl = (Closure)closure.clone() cl.setDelegate(dsl) cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.call() - dsl.build(sources) + dsl.build(targets) } } @@ -59,9 +59,9 @@ class OutputDef { */ @Slf4j @CompileStatic -class OutputDsl { +class PublishDsl { - private Map rules = [:] + private Map targetConfigs = [:] private Path directory @@ -69,7 +69,7 @@ class OutputDsl { void directory(String directory) { if( this.directory ) - throw new ScriptRuntimeException("Output directory cannot be defined more than once in the workflow outputs") + throw new ScriptRuntimeException("Publish directory cannot be defined more than once in the workflow publish definition") this.directory = (directory as Path).complete() } @@ -107,28 +107,28 @@ class OutputDsl { private void setDefault(String name, Object value) { if( defaults.containsKey(name) ) - throw new ScriptRuntimeException("Default `${name}` option cannot be defined more than once in the workflow outputs") + throw new ScriptRuntimeException("Default `${name}` option cannot be defined more than once in the workflow publish definition") defaults[name] = value } - void rule(String name, Closure closure) { - if( rules.containsKey(name) ) - throw new ScriptRuntimeException("Publish rule '${name}' is defined more than once in the workflow outputs") + void target(String name, Closure closure) { + if( targetConfigs.containsKey(name) ) + throw new ScriptRuntimeException("Target '${name}' is defined more than once in the workflow publish definition") - final dsl = new RuleDsl() + final dsl = new TargetDsl() final cl = (Closure)closure.clone() cl.setResolveStrategy(Closure.DELEGATE_FIRST) cl.setDelegate(dsl) cl.call() - rules[name] = dsl.getOptions() + targetConfigs[name] = dsl.getOptions() } - void build(Map sources) { - for( final entry : sources ) { + void build(Map targets) { + for( final entry : targets ) { final source = entry.key final name = entry.value - final opts = publishOptions(name, rules[name] ?: [:]) + final opts = publishOptions(name, targetConfigs[name] ?: [:]) new PublishOp(CH.getReadChannel(source), opts).apply() } @@ -145,7 +145,7 @@ class OutputDsl { return opts } - static class RuleDsl { + static class TargetDsl { private Map opts = [:] @@ -187,7 +187,7 @@ class OutputDsl { private void setOption(String name, Object value) { if( opts.containsKey(name) ) - throw new ScriptRuntimeException("Publish option `${name}` cannot be defined more than once in a given rule") + throw new ScriptRuntimeException("Publish option `${name}` cannot be defined more than once for a given target") opts[name] = value } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy index bc0d2b3035..5bb4a452de 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy @@ -156,16 +156,14 @@ class WorkflowBinding extends Binding { } } - void _into_publish(DataflowWriteChannel source, String name) { - // TODO: add rules to "default" mapping for component - // then add to workflow publisher only when component is invoked - owner.session.publishRules[source] = name + void _publish_target(DataflowWriteChannel source, String name) { + owner.session.publishTargets[source] = name } - void _into_publish(ChannelOut out, String name) { + void _publish_target(ChannelOut out, String name) { if( out.size() != 1 ) - throw new IllegalArgumentException("Cannot send a multi-channel output into a topic") - _into_publish(out[0], name) + throw new IllegalArgumentException("Cannot publish a multi-channel output") + _publish_target(out[0], name) } } diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index 1f81788f61..da4ccf5065 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -74,7 +74,7 @@ workflow { foo.out >> 'data' } -output { +publish { directory 'results' 'data' { From 72d8a6f5aeea1bc15132af2be44370f44917a79f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 15 Apr 2024 13:56:36 -0500 Subject: [PATCH 28/47] Disallow absolute path in publish target Signed-off-by: Ben Sherman --- docs/workflow.md | 6 +++++- .../src/main/groovy/nextflow/script/PublishDef.groovy | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index 6a05dd4156..042b5434e9 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -437,7 +437,11 @@ results/ ``` :::{note} -The trailing slash in the target name is not required; it is only used to denote that the target name is intended to be used as the publish path. In general, the target name can be any string, but it should be a valid path name when using the default publishing behavior. +The trailing slash in the target name is not required; it is only used to denote that the target name is intended to be used as the publish path. +::: + +:::{warning} +The target name must not begin with a slash (`/`), it should be a relative path name. ::: Publish targets can also be customized in the publish definition using a set of options similar to the {ref}`process-publishdir` directive. diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index 9bfcc863b2..b0cddbb5ec 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -141,7 +141,11 @@ class PublishDsl { final opts = defaults + overrides if( opts.containsKey('ignoreErrors') ) opts.failOnError = !opts.remove('ignoreErrors') - opts.path = directory.resolve(opts.path as String ?: name) + + final path = opts.path as String ?: name + if( path.startsWith('/') ) + throw new ScriptRuntimeException("Invalid publish target path '${path}' -- it should be a relative path") + opts.path = directory.resolve(path) return opts } From 370aa0becea1f13e0ffe820bffa96bf57fd89271 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 15 Apr 2024 14:56:20 -0500 Subject: [PATCH 29/47] Add feature flag Signed-off-by: Ben Sherman --- docs/config.md | 9 +++++++ docs/process.md | 4 ---- docs/workflow.md | 4 ++++ .../src/main/groovy/nextflow/NF.groovy | 4 ++++ .../main/groovy/nextflow/NextflowMeta.groovy | 24 ++++++++++++------- .../groovy/nextflow/script/BaseScript.groovy | 3 +++ tests/publish-dsl.nf | 2 +- 7 files changed, 36 insertions(+), 14 deletions(-) diff --git a/docs/config.md b/docs/config.md index a3cd1e34db..c9d9998b45 100644 --- a/docs/config.md +++ b/docs/config.md @@ -2072,6 +2072,15 @@ Some features can be enabled using the `nextflow.enable` and `nextflow.preview` - Nextflow will fail if multiple functions and/or processes with the same name are defined in a module script +`nextflow.preview.publish` + +: :::{versionadded} 24.04.0 + ::: + +: *Experimental: may change in a future release.* + +: When `true`, enables the use of the {ref}`workflow publish definition `. + `nextflow.preview.recursion` : :::{versionadded} 21.11.0-edge diff --git a/docs/process.md b/docs/process.md index 203de7a8e9..6b5837ba71 100644 --- a/docs/process.md +++ b/docs/process.md @@ -2163,10 +2163,6 @@ The following options are available: ### publishDir -:::{deprecated} 24.04.0 -The `publishDir` directive has been deprecated in favor of the {ref}`workflow publish definition `. -::: - The `publishDir` directive allows you to publish the process output files to a specified folder. For example: ```groovy diff --git a/docs/workflow.md b/docs/workflow.md index 042b5434e9..907ac7398f 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -324,6 +324,10 @@ The fully qualified process name can be used as a {ref}`process selector Date: Mon, 15 Apr 2024 15:20:42 -0500 Subject: [PATCH 30/47] Allow multi-channel output to be published Signed-off-by: Ben Sherman --- docs/workflow.md | 2 +- .../src/main/groovy/nextflow/script/WorkflowBinding.groovy | 5 ++--- tests/publish-dsl.nf | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index 907ac7398f..1c31d2cd9c 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -377,7 +377,7 @@ In the above example, the output `results` of process `foo` is published to the In a process, any output with an `emit` name can be published. In a workflow, any channel defined in the workflow, including process and subworkflow outputs, can be published. :::{note} -A process/workflow output (e.g. `foo.out`) can only be published directly if it contains a single output channel. Multi-channel outputs must be published by index or name (e.g. `foo.out[0]` or `foo.out.results`). +If a process/workflow output (e.g. `foo.out`) contains multiple channels, each channel will be published. Individual output channels can also be published by index or name (e.g. `foo.out[0]` or `foo.out.results`). ::: As shown in the example, workflows can override the publish targets of process and subworkflow outputs. This way, each process and workflow can define some sensible defaults for publishing, which can be overridden by calling workflows as needed. diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy index 5bb4a452de..105d67a246 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy @@ -161,9 +161,8 @@ class WorkflowBinding extends Binding { } void _publish_target(ChannelOut out, String name) { - if( out.size() != 1 ) - throw new IllegalArgumentException("Cannot publish a multi-channel output") - _publish_target(out[0], name) + for( final ch : out ) + _publish_target(ch, name) } } diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index 3ef56f2988..d02e1c93c8 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -68,8 +68,7 @@ workflow { foo() publish: - align.out[0] >> 'data' - align.out[1] >> 'data' + align.out >> 'data' my_combine.out >> 'more/data' foo.out >> 'data' } From 0833e1155bfbd42411b61d7266680b8b0e78f65d Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 16 Apr 2024 00:44:33 -0500 Subject: [PATCH 31/47] Add overwrite modes for deep / lenient / standard hash comparison Signed-off-by: Ben Sherman --- docs/workflow.md | 43 +++++++++++++++---- .../nextflow/processor/PublishDir.groovy | 23 ++++++++-- .../groovy/nextflow/script/PublishDef.groovy | 8 ++++ .../src/main/nextflow/util/CacheHelper.java | 30 ++++++++----- 4 files changed, 81 insertions(+), 23 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index 1c31d2cd9c..15591dad7d 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -483,17 +483,44 @@ Available options: : When `true`, the workflow will not fail if a file can't be published for some reason (default: `false`). `mode` -: The file publishing method (default: `'symlink'`). Can be one of the following values: +: The file publishing method (default: `'symlink'`). The following options are available: - - `'copy'`: Copy each file into the output directory. - - `'copyNoFollow'`: Copy each file into the output directory without following symlinks, i.e. only the link is copied. - - `'link'`: Create a hard link in the output directory for each file. - - `'move'`: Move each file into the output directory. **Note**: should only be used for files which are not used by downstream processes in the workflow. - - `'rellink'`: Create a relative symbolic link in the output directory for each file. - - `'symlink'`: Create an absolute symbolic link in the output directory for each output file. + `'copy'` + : Copy each file into the output directory. + + `'copyNoFollow'` + : Copy each file into the output directory without following symlinks, i.e. only the link is copied. + + `'link'` + : Create a hard link in the output directory for each file. + + `'move'` + : Move each file into the output directory. + : Should only be used for files which are not used by downstream processes in the workflow. + + `'rellink'` + : Create a relative symbolic link in the output directory for each file. + + `'symlink'` + : Create an absolute symbolic link in the output directory for each output file. `overwrite` -: When `true` any existing file in the specified folder will be overwritten (default: `true`). +: When `true` any existing file in the specified folder will be overwritten (default: `'standard'`). The following options are available: + + `false` + : Never overwrite existing files. + + `true` + : Always overwrite existing files. + + `'deep'` + : Overwrite existing files when the file content is different. + + `'lenient'` + : Overwrite existing files when the file size is different. + + `'standard'` + : Overwrite existing files when the file size or last modified timestamp is different. `path` : Specify the publish path relative to the output directory (default: the target name). Can only be specified within a target definition. diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 6c7aed3735..0e52fd50ef 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -46,7 +46,11 @@ import nextflow.extension.FilesEx import nextflow.file.FileHelper import nextflow.file.TagAwareFile import nextflow.fusion.FusionHelper +import nextflow.util.CacheHelper import nextflow.util.PathTrie + +import static nextflow.util.CacheHelper.HashMode + /** * Implements the {@code publishDir} directory. It create links or copies the output * files of a given task to a user specified directory. @@ -73,9 +77,9 @@ class PublishDir { Path path /** - * Whenever overwrite existing files + * Whether to overwrite existing files */ - Boolean overwrite + def /* Boolean | String */ overwrite = 'standard' /** * The publish {@link Mode} @@ -199,7 +203,7 @@ class PublishDir { result.pattern = params.pattern if( params.overwrite != null ) - result.overwrite = Boolean.parseBoolean(params.overwrite.toString()) + result.overwrite = params.overwrite if( params.saveAs ) result.saveAs = (Closure) params.saveAs @@ -427,7 +431,7 @@ class PublishDir { if( !sameRealPath && checkSourcePathConflicts(destination)) return - if( !sameRealPath && overwrite ) { + if( !sameRealPath && shouldOverwrite(source, destination) ) { FileHelper.deletePath(destination) processFileImpl(source, destination) } @@ -511,6 +515,17 @@ class PublishDir { return !mode || mode == Mode.SYMLINK || mode == Mode.RELLINK } + protected boolean shouldOverwrite(Path source, Path target) { + if( overwrite instanceof Boolean ) + return overwrite + + final hashMode = HashMode.of(overwrite) ?: HashMode.DEFAULT() + final sourceHash = CacheHelper.hasher(CacheHelper.defaultHasher().newHasher(), source, hashMode, source.parent).hash() + final targetHash = CacheHelper.hasher(CacheHelper.defaultHasher().newHasher(), target, hashMode, target.parent).hash() + log.trace "comparing source and target with mode=${overwrite}, source=${sourceHash}, target=${targetHash}, should overwrite=${sourceHash != targetHash}" + return sourceHash != targetHash + } + protected void processFileImpl( Path source, Path destination ) { log.trace "publishing file: $source -[$mode]-> $destination" diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index b0cddbb5ec..c84b9d3a9a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -97,6 +97,10 @@ class PublishDsl { setDefault('overwrite', value) } + void overwrite(String value) { + setDefault('overwrite', value) + } + void storageClass(String value) { setDefault('storageClass', value) } @@ -177,6 +181,10 @@ class PublishDsl { setOption('overwrite', value) } + void overwrite(String value) { + setOption('overwrite', value) + } + void path(String value) { setOption('path', value) } diff --git a/modules/nf-commons/src/main/nextflow/util/CacheHelper.java b/modules/nf-commons/src/main/nextflow/util/CacheHelper.java index 95bf2f5683..1614bf83ba 100644 --- a/modules/nf-commons/src/main/nextflow/util/CacheHelper.java +++ b/modules/nf-commons/src/main/nextflow/util/CacheHelper.java @@ -94,11 +94,11 @@ public static HashMode of( Object obj ) { private static final Logger log = LoggerFactory.getLogger(CacheHelper.class); - private static HashFunction DEFAULT_HASHING = Hashing.murmur3_128(); + private static final HashFunction DEFAULT_HASHING = Hashing.murmur3_128(); - private static int HASH_BITS = DEFAULT_HASHING.bits(); + private static final int HASH_BITS = DEFAULT_HASHING.bits(); - private static int HASH_BYTES = HASH_BITS / 8; + private static final int HASH_BYTES = HASH_BITS / 8; private static final Map FIRST_ONLY; @@ -124,6 +124,10 @@ public static Hasher hasher( HashFunction function, Object value, HashMode mode } public static Hasher hasher( Hasher hasher, Object value, HashMode mode ) { + return hasher( hasher, value, mode, null ); + } + + public static Hasher hasher( Hasher hasher, Object value, HashMode mode, Path basePath ) { if( value == null ) return hasher; @@ -195,10 +199,10 @@ public static Hasher hasher( Hasher hasher, Object value, HashMode mode ) { return CacheHelper.hasher(hasher, ((FileHolder) value).getSourceObj(), mode ); if( value instanceof Path ) - return hashFile(hasher, (Path)value, mode); + return hashFile(hasher, (Path)value, mode, basePath); if( value instanceof java.io.File ) - return hashFile(hasher, (java.io.File)value, mode); + return hashFile(hasher, (java.io.File)value, mode, basePath); if( value instanceof UUID ) { UUID uuid = (UUID)value; @@ -235,8 +239,8 @@ public static Hasher hasher( Hasher hasher, Object value, HashMode mode ) { * (full name, size and last update timestamp) * @return The updated {@code Hasher} object */ - static private Hasher hashFile( Hasher hasher, java.io.File file, HashMode mode ) { - return hashFile(hasher, file.toPath(), mode); + static private Hasher hashFile( Hasher hasher, java.io.File file, HashMode mode, Path basePath ) { + return hashFile(hasher, file.toPath(), mode, basePath); } /** @@ -249,7 +253,7 @@ static private Hasher hashFile( Hasher hasher, java.io.File file, HashMode mode * (full name, size and last update timestamp) * @return The updated {@code Hasher} object */ - static private Hasher hashFile( Hasher hasher, Path path, HashMode mode ) { + static private Hasher hashFile( Hasher hasher, Path path, HashMode mode, Path basePath ) { BasicFileAttributes attrs=null; try { attrs = Files.readAttributes(path, BasicFileAttributes.class); @@ -290,7 +294,7 @@ static private Hasher hashFile( Hasher hasher, Path path, HashMode mode ) { if( mode==HashMode.SHA256 && attrs!=null && attrs.isRegularFile() ) return hashFileSha256(hasher, path, null); // default - return hashFileMetadata(hasher, path, attrs, mode); + return hashFileMetadata(hasher, path, attrs, mode, basePath); } @@ -379,9 +383,13 @@ static private Hasher hashFileAsset( Hasher hasher, Path path ) { * @param file file The {@code Path} object to hash * @return The updated {@code Hasher} object */ - static private Hasher hashFileMetadata( Hasher hasher, Path file, BasicFileAttributes attrs, HashMode mode ) { + static private Hasher hashFileMetadata( Hasher hasher, Path file, BasicFileAttributes attrs, HashMode mode, Path basePath ) { + + String filename = basePath != null && file.startsWith(basePath) + ? basePath.relativize(file).toString() + : file.toAbsolutePath().toString(); - hasher = hasher.putUnencodedChars( file.toAbsolutePath().toString() ); + hasher = hasher.putUnencodedChars( filename ); if( attrs != null ) { hasher = hasher.putLong(attrs.size()); if( attrs.lastModifiedTime() != null && mode != HashMode.LENIENT ) { From 02745f7f64dd66ff088dd883c005be1e1176594f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 17 Apr 2024 09:15:36 -0500 Subject: [PATCH 32/47] Factor out HashBuilder from CacheHelper Signed-off-by: Ben Sherman --- .../nextflow/processor/PublishDir.groovy | 6 +- .../nextflow/processor/TaskProcessor.groovy | 3 +- .../src/main/nextflow/util/CacheHelper.java | 410 +-------------- .../src/main/nextflow/util/HashBuilder.java | 468 ++++++++++++++++++ .../test/nextflow/util/CacheHelperTest.groovy | 4 +- 5 files changed, 477 insertions(+), 414 deletions(-) create mode 100644 modules/nf-commons/src/main/nextflow/util/HashBuilder.java diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 0e52fd50ef..0892a5b3a7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -46,7 +46,7 @@ import nextflow.extension.FilesEx import nextflow.file.FileHelper import nextflow.file.TagAwareFile import nextflow.fusion.FusionHelper -import nextflow.util.CacheHelper +import nextflow.util.HashBuilder import nextflow.util.PathTrie import static nextflow.util.CacheHelper.HashMode @@ -520,8 +520,8 @@ class PublishDir { return overwrite final hashMode = HashMode.of(overwrite) ?: HashMode.DEFAULT() - final sourceHash = CacheHelper.hasher(CacheHelper.defaultHasher().newHasher(), source, hashMode, source.parent).hash() - final targetHash = CacheHelper.hasher(CacheHelper.defaultHasher().newHasher(), target, hashMode, target.parent).hash() + final sourceHash = new HashBuilder().withMode(hashMode).withBasePath(source.parent).with(source).build() + final targetHash = new HashBuilder().withMode(hashMode).withBasePath(target.parent).with(target).build() log.trace "comparing source and target with mode=${overwrite}, source=${sourceHash}, target=${targetHash}, should overwrite=${sourceHash != targetHash}" return sourceHash != targetHash } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index ab96b093db..1976553b0d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -109,6 +109,7 @@ import nextflow.util.ArrayBag import nextflow.util.BlankSeparatedList import nextflow.util.CacheHelper import nextflow.util.Escape +import nextflow.util.HashBuilder import nextflow.util.LockManager import nextflow.util.LoggerHelper import nextflow.util.TestOnly @@ -796,7 +797,7 @@ class TaskProcessor { int tries = task.failCount +1 while( true ) { - hash = CacheHelper.defaultHasher().newHasher().putBytes(hash.asBytes()).putInt(tries).hash() + hash = HashBuilder.defaultHasher().putBytes(hash.asBytes()).putInt(tries).hash() Path resumeDir = null boolean exists = false diff --git a/modules/nf-commons/src/main/nextflow/util/CacheHelper.java b/modules/nf-commons/src/main/nextflow/util/CacheHelper.java index 1614bf83ba..b06c1e3203 100644 --- a/modules/nf-commons/src/main/nextflow/util/CacheHelper.java +++ b/modules/nf-commons/src/main/nextflow/util/CacheHelper.java @@ -16,40 +16,10 @@ package nextflow.util; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.file.FileVisitResult; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.ProviderMismatchException; -import java.nio.file.SimpleFileVisitor; -import java.nio.file.attribute.BasicFileAttributes; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ExecutionException; - -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.hash.Funnels; -import com.google.common.hash.HashCode; import com.google.common.hash.HashFunction; import com.google.common.hash.Hasher; -import com.google.common.hash.Hashing; -import com.google.common.io.ByteStreams; -import nextflow.Global; -import nextflow.ISession; -import nextflow.extension.Bolts; -import nextflow.extension.FilesEx; -import nextflow.file.FileHolder; -import nextflow.io.SerializableMarker; -import org.slf4j.Logger; import org.slf4j.LoggerFactory; - /** * Provide helper method to handle caching * @@ -92,31 +62,12 @@ public static HashMode of( Object obj ) { } } - private static final Logger log = LoggerFactory.getLogger(CacheHelper.class); - - private static final HashFunction DEFAULT_HASHING = Hashing.murmur3_128(); - - private static final int HASH_BITS = DEFAULT_HASHING.bits(); - - private static final int HASH_BYTES = HASH_BITS / 8; - - private static final Map FIRST_ONLY; - - static { - FIRST_ONLY = new HashMap<>(1); - FIRST_ONLY.put("firstOnly", Boolean.TRUE); - } - - public static HashFunction defaultHasher() { - return DEFAULT_HASHING; - } - public static Hasher hasher( Object value ) { return hasher(value, HashMode.STANDARD); } public static Hasher hasher( Object value, HashMode mode ) { - return hasher( DEFAULT_HASHING, value, mode ); + return hasher( HashBuilder.defaultHasher(), value, mode ); } public static Hasher hasher( HashFunction function, Object value, HashMode mode ) { @@ -124,364 +75,7 @@ public static Hasher hasher( HashFunction function, Object value, HashMode mode } public static Hasher hasher( Hasher hasher, Object value, HashMode mode ) { - return hasher( hasher, value, mode, null ); - } - - public static Hasher hasher( Hasher hasher, Object value, HashMode mode, Path basePath ) { - - if( value == null ) - return hasher; - - if( value instanceof Boolean ) - return hasher.putBoolean((Boolean) value); - - if( value instanceof Short ) - return hasher.putShort((Short) value); - - if( value instanceof Integer) - return hasher.putInt((Integer) value); - - if( value instanceof Long ) - return hasher.putLong((Long) value); - - if( value instanceof Float ) - return hasher.putFloat((Float) value); - - if( value instanceof Double ) - return hasher.putDouble( (Double)value ); - - if( value instanceof Byte ) - return hasher.putByte( (Byte)value ); - - if( value instanceof Number ) - // reduce all other number types (BigInteger, BigDecimal, AtomicXxx, etc) to string equivalent - return hasher.putUnencodedChars(value.toString()); - - if( value instanceof Character ) - return hasher.putChar( (Character)value ); - - if( value instanceof CharSequence ) - return hasher.putUnencodedChars( (CharSequence)value ); - - if( value instanceof byte[] ) - return hasher.putBytes( (byte[])value ); - - if( value instanceof Object[]) { - for( Object item: ((Object[])value) ) - hasher = CacheHelper.hasher( hasher, item, mode ); - return hasher; - } - - if( value instanceof Map ) { - // note: should map be order invariant as Set ? - for( Object item : ((Map)value).values() ) - hasher = CacheHelper.hasher( hasher, item, mode ); - return hasher; - } - - if( value instanceof Map.Entry ) { - Map.Entry entry = (Map.Entry)value; - hasher = CacheHelper.hasher( hasher, entry.getKey(), mode ); - hasher = CacheHelper.hasher( hasher, entry.getValue(), mode ); - return hasher; - } - - if( value instanceof Bag || value instanceof Set ) - return hashUnorderedCollection(hasher, (Collection) value, mode); - - if( value instanceof Collection) { - for( Object item: ((Collection)value) ) - hasher = CacheHelper.hasher( hasher, item, mode ); - return hasher; - } - - if( value instanceof FileHolder ) - return CacheHelper.hasher(hasher, ((FileHolder) value).getSourceObj(), mode ); - - if( value instanceof Path ) - return hashFile(hasher, (Path)value, mode, basePath); - - if( value instanceof java.io.File ) - return hashFile(hasher, (java.io.File)value, mode, basePath); - - if( value instanceof UUID ) { - UUID uuid = (UUID)value; - return hasher.putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits()); - } - - if( value instanceof VersionNumber ) { - return hasher.putInt( value.hashCode() ); - } - - if( value instanceof SerializableMarker) { - return hasher.putInt( value.hashCode() ); - } - - if( value instanceof CacheFunnel ) { - return ((CacheFunnel) value).funnel(hasher,mode); - } - - if( value instanceof Enum ) { - return hasher.putUnencodedChars( value.getClass().getName() + "." + value ); - } - - Bolts.debug1(log, FIRST_ONLY, "[WARN] Unknown hashing type: "+value.getClass()); - return hasher.putInt( value.hashCode() ); - } - - /** - * Hashes the specified file - * - * @param hasher The current {@code Hasher} object - * @param file The {@code File} object to hash - * @param mode When {@code mode} is equals to the string {@code deep} is used the file content - * in order to create the hash key for this file, otherwise just the file metadata information - * (full name, size and last update timestamp) - * @return The updated {@code Hasher} object - */ - static private Hasher hashFile( Hasher hasher, java.io.File file, HashMode mode, Path basePath ) { - return hashFile(hasher, file.toPath(), mode, basePath); - } - - /** - * Hashes the specified file - * - * @param hasher The current {@code Hasher} object - * @param path The {@code Path} object to hash - * @param mode When {@code mode} is equals to the string {@code deep} is used the file content - * in order to create the hash key for this file, otherwise just the file metadata information - * (full name, size and last update timestamp) - * @return The updated {@code Hasher} object - */ - static private Hasher hashFile( Hasher hasher, Path path, HashMode mode, Path basePath ) { - BasicFileAttributes attrs=null; - try { - attrs = Files.readAttributes(path, BasicFileAttributes.class); - } - catch(IOException e) { - log.debug("Unable to get file attributes file: {} -- Cause: {}", FilesEx.toUriString(path), e.toString()); - } - catch(ProviderMismatchException e) { - // see https://github.com/nextflow-io/nextflow/pull/1382 - log.warn("File system is unable to get file attributes file: {} -- Cause: {}", FilesEx.toUriString(path), e.toString()); - } - catch(Exception e) { - log.warn("Unable to get file attributes file: {} -- Cause: {}", FilesEx.toUriString(path), e.toString()); - } - - if( (mode==HashMode.STANDARD || mode==HashMode.LENIENT) && isAssetFile(path) ) { - if( attrs==null ) { - // when file attributes are not avail or it's a directory - // hash the file using the file name path and the repository - log.warn("Unable to fetch attribute for file: {} - Hash is inferred from Git repository commit Id", FilesEx.toUriString(path)); - return hashFileAsset(hasher, path); - } - final Path base = Global.getSession().getBaseDir(); - if( attrs.isDirectory() ) { - // hash all the directory content - return hashDirSha256(hasher, path, base); - } - else { - // hash the content being an asset file - // (i.e. included in the project repository) it's expected to small file - // which makes the content hashing doable - return hashFileSha256(hasher, path, base); - } - } - - if( mode==HashMode.DEEP && attrs!=null && attrs.isRegularFile() ) - return hashFileContent(hasher, path); - if( mode==HashMode.SHA256 && attrs!=null && attrs.isRegularFile() ) - return hashFileSha256(hasher, path, null); - // default - return hashFileMetadata(hasher, path, attrs, mode, basePath); - } - - - static private LoadingCache sha256Cache = CacheBuilder - .newBuilder() - .maximumSize(10_000) - .build(new CacheLoader() { - @Override - public String load(Path key) throws Exception { - return hashFileSha256Impl0(key); - } - }); - - static protected Hasher hashFileSha256( Hasher hasher, Path path, Path base ) { - try { - log.trace("Hash sha-256 file content path={} - base={}", path, base); - // the file relative base - if( base!=null ) - hasher.putUnencodedChars(base.relativize(path).toString()); - // file content hash - String sha256 = sha256Cache.get(path); - hasher.putUnencodedChars(sha256); - } - catch (ExecutionException t) { - Throwable err = t.getCause()!=null ? t.getCause() : t; - String msg = err.getMessage()!=null ? err.getMessage() : err.toString(); - log.warn("Unable to compute sha-256 hashing for file: {} - Cause: {}", FilesEx.toUriString(path), msg); - } - return hasher; - } - - static protected Hasher hashDirSha256( Hasher hasher, Path dir, Path base ) { - try { - Files.walkFileTree(dir, new SimpleFileVisitor() { - public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) throws IOException { - log.trace("Hash sha-256 dir content [FILE] path={} - base={}", path, base); - try { - // the file relative base - if( base!=null ) - hasher.putUnencodedChars(base.relativize(path).toString()); - // the file content sha-256 checksum - String sha256 = sha256Cache.get(path); - hasher.putUnencodedChars(sha256); - return FileVisitResult.CONTINUE; - } - catch (ExecutionException t) { - throw new IOException(t); - } - } - - public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs) { - log.trace("Hash sha-256 dir content [DIR] path={} - base={}", path, base); - // the file relative base - if( base!=null ) - hasher.putUnencodedChars(base.relativize(path).toString()); - hasher.putUnencodedChars(base.relativize(path).toString()); - return FileVisitResult.CONTINUE; - } - }); - } - catch (IOException t) { - Throwable err = t.getCause()!=null ? t.getCause() : t; - String msg = err.getMessage()!=null ? err.getMessage() : err.toString(); - log.warn("Unable to compute sha-256 hashing for directory: {} - Cause: {}", FilesEx.toUriString(dir), msg); - } - return hasher; - } - - static protected String hashFileSha256Impl0(Path path) throws IOException { - log.debug("Hash asset file sha-256: {}", path); - Hasher hasher = Hashing.sha256().newHasher(); - ByteStreams.copy(Files.newInputStream(path), Funnels.asOutputStream(hasher)); - return hasher.hash().toString(); - } - - static private Hasher hashFileAsset( Hasher hasher, Path path ) { - log.debug("Hash asset file: {}", path); - hasher.putUnencodedChars( Global.getSession().getCommitId() ); - return hasher; - } - - /** - * Hashes the file by using the metadata information: full path string, size and last update timestamp - * - * @param hasher The current {@code Hasher} object - * @param file file The {@code Path} object to hash - * @return The updated {@code Hasher} object - */ - static private Hasher hashFileMetadata( Hasher hasher, Path file, BasicFileAttributes attrs, HashMode mode, Path basePath ) { - - String filename = basePath != null && file.startsWith(basePath) - ? basePath.relativize(file).toString() - : file.toAbsolutePath().toString(); - - hasher = hasher.putUnencodedChars( filename ); - if( attrs != null ) { - hasher = hasher.putLong(attrs.size()); - if( attrs.lastModifiedTime() != null && mode != HashMode.LENIENT ) { - hasher = hasher.putLong( attrs.lastModifiedTime().toMillis() ); - } - } - - if( log.isTraceEnabled() ) { - log.trace("Hashing file meta: path={}; size={}, lastModified={}, mode={}", - file.toAbsolutePath().toString(), - attrs!=null ? attrs.size() : "--", - attrs!=null && attrs.lastModifiedTime() != null && mode != HashMode.LENIENT ? attrs.lastModifiedTime().toMillis() : "--", - mode - ); - } - return hasher; - } - - - /** - * Hashes the file by reading file content - * - * @param hasher The current {@code Hasher} object - * @param path file The {@code Path} object to hash - * @return The updated {@code Hasher} object - */ - - static private Hasher hashFileContent( Hasher hasher, Path path ) { - - OutputStream output = Funnels.asOutputStream(hasher); - try { - Files.copy(path, output); - } - catch( IOException e ) { - throw new IllegalStateException("Unable to hash content: " + FilesEx.toUriString(path), e); - } - finally { - FilesEx.closeQuietly(output); - } - - return hasher; - } - - static HashCode hashContent( Path file ) { - return hashContent(file, null); - } - - static HashCode hashContent( Path file, HashFunction function ) { - - if( function == null ) - function = DEFAULT_HASHING; - - Hasher hasher = function.newHasher(); - return hashFileContent(hasher, file).hash(); - } - - static private Hasher hashUnorderedCollection(Hasher hasher, Collection collection, HashMode mode) { - - byte[] resultBytes = new byte[HASH_BYTES]; - for (Object item : collection) { - byte[] nextBytes = CacheHelper.hasher(item,mode).hash().asBytes(); - if( nextBytes.length != resultBytes.length ) - throw new IllegalStateException("All hash codes must have the same bit length"); - - for (int i = 0; i < nextBytes.length; i++) { - resultBytes[i] += nextBytes[i]; - } - } - - return hasher.putBytes(resultBytes); - - } - - /** - * Check if the argument is an asset file i.e. a file that makes part of the - * pipeline Git repository - * - * @param path - * @return - */ - static protected boolean isAssetFile(Path path) { - final ISession session = Global.getSession(); - if( session==null ) - return false; - // if the commit ID is null the current run is not launched from a repo - if( session.getCommitId()==null ) - return false; - // if the file belong to different file system, cannot be a file belonging to the repo - if( session.getBaseDir().getFileSystem()!=path.getFileSystem() ) - return false; - // if the file is in the same directory as the base dir it's a asset by definition - return path.startsWith(session.getBaseDir()); + return HashBuilder.hasher(hasher, value, mode); } } diff --git a/modules/nf-commons/src/main/nextflow/util/HashBuilder.java b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java new file mode 100644 index 0000000000..235e211f40 --- /dev/null +++ b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java @@ -0,0 +1,468 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.ProviderMismatchException; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ExecutionException; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.common.hash.Funnels; +import com.google.common.hash.HashCode; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; +import com.google.common.io.ByteStreams; +import nextflow.Global; +import nextflow.ISession; +import nextflow.extension.Bolts; +import nextflow.extension.FilesEx; +import nextflow.file.FileHolder; +import nextflow.io.SerializableMarker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static nextflow.util.CacheHelper.HashMode; + + +/** + * Implements the hashing of objects + * + * @author Paolo Di Tommaso + */ +public class HashBuilder { + + private static final Logger log = LoggerFactory.getLogger(HashBuilder.class); + + private static final HashFunction DEFAULT_HASHING = Hashing.murmur3_128(); + + private static final int HASH_BITS = DEFAULT_HASHING.bits(); + + private static final int HASH_BYTES = HASH_BITS / 8; + + private static final Map FIRST_ONLY; + + static { + FIRST_ONLY = new HashMap<>(1); + FIRST_ONLY.put("firstOnly", Boolean.TRUE); + } + + public static Hasher defaultHasher() { + return HashBuilder.DEFAULT_HASHING.newHasher(); + } + + private Hasher hasher = defaultHasher(); + + private HashMode mode = HashMode.STANDARD; + + private Path basePath; + + public HashBuilder() {} + + public HashBuilder withHasher(Hasher hasher) { + this.hasher = hasher; + return this; + } + + public HashBuilder withMode(HashMode mode) { + this.mode = mode; + return this; + } + + public HashBuilder withBasePath(Path basePath) { + this.basePath = basePath; + return this; + } + + public HashBuilder with(Object value) { + + if( value == null ) + return this; + + else if( value instanceof Boolean ) + hasher.putBoolean((Boolean) value); + + else if( value instanceof Short ) + hasher.putShort((Short) value); + + else if( value instanceof Integer) + hasher.putInt((Integer) value); + + else if( value instanceof Long ) + hasher.putLong((Long) value); + + else if( value instanceof Float ) + hasher.putFloat((Float) value); + + else if( value instanceof Double ) + hasher.putDouble( (Double)value ); + + else if( value instanceof Byte ) + hasher.putByte( (Byte)value ); + + else if( value instanceof Number ) + // reduce all other number types (BigInteger, BigDecimal, AtomicXxx, etc) to string equivalent + hasher.putUnencodedChars(value.toString()); + + else if( value instanceof Character ) + hasher.putChar( (Character)value ); + + else if( value instanceof CharSequence ) + hasher.putUnencodedChars( (CharSequence)value ); + + else if( value instanceof byte[] ) + hasher.putBytes( (byte[])value ); + + else if( value instanceof Object[]) + for( Object item : ((Object[])value) ) + with(item); + + // note: should map be order invariant as Set ? + else if( value instanceof Map ) + for( Object item : ((Map)value).values() ) + with(item); + + else if( value instanceof Map.Entry ) { + Map.Entry entry = (Map.Entry)value; + with(entry.getKey()); + with(entry.getValue()); + } + + else if( value instanceof Bag || value instanceof Set ) + hashUnorderedCollection(hasher, (Collection) value, mode); + + else if( value instanceof Collection) + for( Object item : ((Collection)value) ) + with(item); + + else if( value instanceof FileHolder ) + with(((FileHolder) value).getSourceObj()); + + else if( value instanceof Path ) + hashFile(hasher, (Path)value, mode, basePath); + + else if( value instanceof java.io.File ) + hashFile(hasher, (java.io.File)value, mode, basePath); + + else if( value instanceof UUID ) { + UUID uuid = (UUID)value; + hasher.putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits()); + } + + else if( value instanceof VersionNumber ) + hasher.putInt( value.hashCode() ); + + else if( value instanceof SerializableMarker) + hasher.putInt( value.hashCode() ); + + else if( value instanceof CacheFunnel ) + ((CacheFunnel)value).funnel(hasher, mode); + + else if( value instanceof Enum ) + hasher.putUnencodedChars( value.getClass().getName() + "." + value ); + + else { + Bolts.debug1(log, FIRST_ONLY, "[WARN] Unknown hashing type: " + value.getClass()); + hasher.putInt( value.hashCode() ); + } + + return this; + } + + public Hasher getHasher() { + return hasher; + } + + public HashCode build() { + return hasher.hash(); + } + + public static Hasher hasher( Hasher hasher, Object value, HashMode mode ) { + + return new HashBuilder() + .withHasher(hasher) + .withMode(mode) + .with(value) + .getHasher(); + } + + /** + * Hashes the specified file + * + * @param hasher The current {@code Hasher} object + * @param file The {@code File} object to hash + * @param mode When {@code mode} is equals to the string {@code deep} is used the file content + * in order to create the hash key for this file, otherwise just the file metadata information + * (full name, size and last update timestamp) + * @return The updated {@code Hasher} object + */ + static private Hasher hashFile( Hasher hasher, java.io.File file, HashMode mode, Path basePath ) { + return hashFile(hasher, file.toPath(), mode, basePath); + } + + /** + * Hashes the specified file + * + * @param hasher The current {@code Hasher} object + * @param path The {@code Path} object to hash + * @param mode When {@code mode} is equals to the string {@code deep} is used the file content + * in order to create the hash key for this file, otherwise just the file metadata information + * (full name, size and last update timestamp) + * @return The updated {@code Hasher} object + */ + static private Hasher hashFile( Hasher hasher, Path path, HashMode mode, Path basePath ) { + BasicFileAttributes attrs=null; + try { + attrs = Files.readAttributes(path, BasicFileAttributes.class); + } + catch(IOException e) { + log.debug("Unable to get file attributes file: {} -- Cause: {}", FilesEx.toUriString(path), e.toString()); + } + catch(ProviderMismatchException e) { + // see https://github.com/nextflow-io/nextflow/pull/1382 + log.warn("File system is unable to get file attributes file: {} -- Cause: {}", FilesEx.toUriString(path), e.toString()); + } + catch(Exception e) { + log.warn("Unable to get file attributes file: {} -- Cause: {}", FilesEx.toUriString(path), e.toString()); + } + + if( (mode==HashMode.STANDARD || mode==HashMode.LENIENT) && isAssetFile(path) ) { + if( attrs==null ) { + // when file attributes are not avail or it's a directory + // hash the file using the file name path and the repository + log.warn("Unable to fetch attribute for file: {} - Hash is inferred from Git repository commit Id", FilesEx.toUriString(path)); + return hashFileAsset(hasher, path); + } + final Path base = Global.getSession().getBaseDir(); + if( attrs.isDirectory() ) { + // hash all the directory content + return hashDirSha256(hasher, path, base); + } + else { + // hash the content being an asset file + // (i.e. included in the project repository) it's expected to small file + // which makes the content hashing doable + return hashFileSha256(hasher, path, base); + } + } + + if( mode==HashMode.DEEP && attrs!=null && attrs.isRegularFile() ) + return hashFileContent(hasher, path); + if( mode==HashMode.SHA256 && attrs!=null && attrs.isRegularFile() ) + return hashFileSha256(hasher, path, null); + // default + return hashFileMetadata(hasher, path, attrs, mode, basePath); + } + + + static private LoadingCache sha256Cache = CacheBuilder + .newBuilder() + .maximumSize(10_000) + .build(new CacheLoader() { + @Override + public String load(Path key) throws Exception { + return hashFileSha256Impl0(key); + } + }); + + static protected Hasher hashFileSha256( Hasher hasher, Path path, Path base ) { + try { + log.trace("Hash sha-256 file content path={} - base={}", path, base); + // the file relative base + if( base!=null ) + hasher.putUnencodedChars(base.relativize(path).toString()); + // file content hash + String sha256 = sha256Cache.get(path); + hasher.putUnencodedChars(sha256); + } + catch (ExecutionException t) { + Throwable err = t.getCause()!=null ? t.getCause() : t; + String msg = err.getMessage()!=null ? err.getMessage() : err.toString(); + log.warn("Unable to compute sha-256 hashing for file: {} - Cause: {}", FilesEx.toUriString(path), msg); + } + return hasher; + } + + static protected Hasher hashDirSha256( Hasher hasher, Path dir, Path base ) { + try { + Files.walkFileTree(dir, new SimpleFileVisitor() { + public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) throws IOException { + log.trace("Hash sha-256 dir content [FILE] path={} - base={}", path, base); + try { + // the file relative base + if( base!=null ) + hasher.putUnencodedChars(base.relativize(path).toString()); + // the file content sha-256 checksum + String sha256 = sha256Cache.get(path); + hasher.putUnencodedChars(sha256); + return FileVisitResult.CONTINUE; + } + catch (ExecutionException t) { + throw new IOException(t); + } + } + + public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs) { + log.trace("Hash sha-256 dir content [DIR] path={} - base={}", path, base); + // the file relative base + if( base!=null ) + hasher.putUnencodedChars(base.relativize(path).toString()); + hasher.putUnencodedChars(base.relativize(path).toString()); + return FileVisitResult.CONTINUE; + } + }); + } + catch (IOException t) { + Throwable err = t.getCause()!=null ? t.getCause() : t; + String msg = err.getMessage()!=null ? err.getMessage() : err.toString(); + log.warn("Unable to compute sha-256 hashing for directory: {} - Cause: {}", FilesEx.toUriString(dir), msg); + } + return hasher; + } + + static protected String hashFileSha256Impl0(Path path) throws IOException { + log.debug("Hash asset file sha-256: {}", path); + Hasher hasher = Hashing.sha256().newHasher(); + ByteStreams.copy(Files.newInputStream(path), Funnels.asOutputStream(hasher)); + return hasher.hash().toString(); + } + + static private Hasher hashFileAsset( Hasher hasher, Path path ) { + log.debug("Hash asset file: {}", path); + hasher.putUnencodedChars( Global.getSession().getCommitId() ); + return hasher; + } + + /** + * Hashes the file by using the metadata information: full path string, size and last update timestamp + * + * @param hasher The current {@code Hasher} object + * @param file file The {@code Path} object to hash + * @return The updated {@code Hasher} object + */ + static private Hasher hashFileMetadata( Hasher hasher, Path file, BasicFileAttributes attrs, HashMode mode, Path basePath ) { + + String filename = basePath != null && file.startsWith(basePath) + ? basePath.relativize(file).toString() + : file.toAbsolutePath().toString(); + + hasher = hasher.putUnencodedChars( filename ); + if( attrs != null ) { + hasher = hasher.putLong(attrs.size()); + if( attrs.lastModifiedTime() != null && mode != HashMode.LENIENT ) { + hasher = hasher.putLong( attrs.lastModifiedTime().toMillis() ); + } + } + + if( log.isTraceEnabled() ) { + log.trace("Hashing file meta: path={}; size={}, lastModified={}, mode={}", + file.toAbsolutePath().toString(), + attrs!=null ? attrs.size() : "--", + attrs!=null && attrs.lastModifiedTime() != null && mode != HashMode.LENIENT ? attrs.lastModifiedTime().toMillis() : "--", + mode + ); + } + return hasher; + } + + /** + * Hashes the file by reading file content + * + * @param hasher The current {@code Hasher} object + * @param path file The {@code Path} object to hash + * @return The updated {@code Hasher} object + */ + static private Hasher hashFileContent( Hasher hasher, Path path ) { + + OutputStream output = Funnels.asOutputStream(hasher); + try { + Files.copy(path, output); + } + catch( IOException e ) { + throw new IllegalStateException("Unable to hash content: " + FilesEx.toUriString(path), e); + } + finally { + FilesEx.closeQuietly(output); + } + + return hasher; + } + + static HashCode hashContent( Path file ) { + return hashContent(file, null); + } + + static HashCode hashContent( Path file, HashFunction function ) { + + if( function == null ) + function = DEFAULT_HASHING; + + Hasher hasher = function.newHasher(); + return hashFileContent(hasher, file).hash(); + } + + static private Hasher hashUnorderedCollection(Hasher hasher, Collection collection, HashMode mode) { + + byte[] resultBytes = new byte[HASH_BYTES]; + for (Object item : collection) { + byte[] nextBytes = HashBuilder.hasher(defaultHasher(), item, mode).hash().asBytes(); + if( nextBytes.length != resultBytes.length ) + throw new IllegalStateException("All hash codes must have the same bit length"); + + for (int i = 0; i < nextBytes.length; i++) { + resultBytes[i] += nextBytes[i]; + } + } + + return hasher.putBytes(resultBytes); + } + + /** + * Check if the argument is an asset file i.e. a file that makes part of the + * pipeline Git repository + * + * @param path + * @return + */ + static protected boolean isAssetFile(Path path) { + final ISession session = Global.getSession(); + if( session==null ) + return false; + // if the commit ID is null the current run is not launched from a repo + if( session.getCommitId()==null ) + return false; + // if the file belong to different file system, cannot be a file belonging to the repo + if( session.getBaseDir().getFileSystem()!=path.getFileSystem() ) + return false; + // if the file is in the same directory as the base dir it's a asset by definition + return path.startsWith(session.getBaseDir()); + } + +} diff --git a/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy b/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy index ea1e2dc479..a599297c20 100644 --- a/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy +++ b/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy @@ -302,9 +302,9 @@ class CacheHelperTest extends Specification { folder.resolve('dir2/xxx/yyy/bar1').text = "I'm bar within yyy" when: - def hash1 = CacheHelper.hashDirSha256(CacheHelper.defaultHasher().newHasher(), folder.resolve('dir1'), folder.resolve('dir1')) + def hash1 = CacheHelper.hashDirSha256(CacheHelper.defaultHasher(), folder.resolve('dir1'), folder.resolve('dir1')) and: - def hash2 = CacheHelper.hashDirSha256(CacheHelper.defaultHasher().newHasher(), folder.resolve('dir2'), folder.resolve('dir2')) + def hash2 = CacheHelper.hashDirSha256(CacheHelper.defaultHasher(), folder.resolve('dir2'), folder.resolve('dir2')) then: hash1.hash() == hash2.hash() From 0583b3d2ff8af056cd138a7cbd0bea8605880728 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 18 Apr 2024 16:05:57 -0500 Subject: [PATCH 33/47] Add index file definition Signed-off-by: Ben Sherman --- docs/workflow.md | 167 ++++++++++++------ .../nextflow/extension/PublishIndexOp.groovy | 74 ++++++++ .../groovy/nextflow/script/PublishDef.groovy | 84 ++++++++- .../groovy/nextflow/util/CsvWriter.groovy | 68 +++++++ .../src/main/nextflow/util/HashBuilder.java | 2 +- tests/publish-dsl.nf | 5 + 6 files changed, 336 insertions(+), 64 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy create mode 100644 modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy diff --git a/docs/workflow.md b/docs/workflow.md index 15591dad7d..878e3006fb 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -317,6 +317,82 @@ Each workflow invocation has its own scope. As a result, the same process can be The fully qualified process name can be used as a {ref}`process selector ` in a Nextflow configuration file, and it takes priority over the simple process name. ::: +## Special operators + +### Pipe `|` + +The `|` *pipe* operator can be used to compose Nextflow processes and operators. For example: + +```groovy +process foo { + input: + val data + + output: + val result + + exec: + result = "$data world" +} + +workflow { + channel.from('Hello','Hola','Ciao') | foo | map { it.toUpperCase() } | view +} +``` + +The above snippet defines a process named `foo` and invokes it with the `data` channel. The result is then piped to the {ref}`operator-map` operator, which converts each string to uppercase, and finally to the {ref}`operator-view` operator which prints it. + +:::{tip} +Statements can also be split across multiple lines for better readability: + +```groovy +workflow { + channel.from('Hello','Hola','Ciao') + | foo + | map { it.toUpperCase() } + | view +} +``` +::: + +### And `&` + +The `&` *and* operator can be used to feed multiple processes with the same channel(s). For example: + +```groovy +process foo { + input: + val data + + output: + val result + + exec: + result = "$data world" +} + +process bar { + input: + val data + + output: + val result + + exec: + result = data.toUpperCase() +} + +workflow { + channel.from('Hello') + | map { it.reverse() } + | (foo & bar) + | mix + | view +} +``` + +In the above snippet, the initial channel is piped to the {ref}`operator-map` operator, which reverses the string value. Then, the result is passed to the processes `foo` and `bar`, which are executed in parallel. Each process outputs a channel, and the two channels are combined using the {ref}`operator-mix` operator. Finally, the result is printed using the {ref}`operator-view` operator. + (workflow-publish-def)= ## Publishing outputs @@ -536,78 +612,59 @@ Available options: tags FOO: 'hello', BAR: 'world' ``` -## Special operators +### Index files -### Pipe `|` +A publish target can create an index file of the values that were published. An index file is a useful way to save the metadata associated with files, and is more flexible than encoding metadata in the file path. Currently only CSV files are supported. -The `|` *pipe* operator can be used to compose Nextflow processes and operators. For example: +For example: ```groovy -process foo { - input: - val data - - output: - val result +workflow { + ch_foo = Channel.of( + [id: 1, name: 'foo 1'], + [id: 2, name: 'foo 2'], + [id: 3, name: 'foo 3'] + ) - exec: - result = "$data world" + publish: + ch_foo >> 'foo/' } -workflow { - channel.from('Hello','Hola','Ciao') | foo | map { it.toUpperCase() } | view +publish { + directory 'results' + + 'foo/' { + index 'index.csv' + } } ``` -The above snippet defines a process named `foo` and invokes it with the `data` channel. The result is then piped to the {ref}`operator-map` operator, which converts each string to uppercase, and finally to the {ref}`operator-view` operator which prints it. - -:::{tip} -Statements can also be split across multiple lines for better readability: +The above example will write the following CSV file to `results/foo/index.csv`: -```groovy -workflow { - channel.from('Hello','Hola','Ciao') - | foo - | map { it.toUpperCase() } - | view -} +```csv +"id","name" +"1","foo 1" +"2","foo 2" +"3","foo 3" ``` -::: -### And `&` - -The `&` *and* operator can be used to feed multiple processes with the same channel(s). For example: +You can customize the index file by specifying options in a block, for example: ```groovy -process foo { - input: - val data - - output: - val result - - exec: - result = "$data world" +index('index.csv') { + header ['name', 'extra_option'] + sep '\t' + mapper { val -> val + [extra_option: 'bar'] } } +``` -process bar { - input: - val data - - output: - val result +The following options are available: - exec: - result = data.toUpperCase() -} +`header` +: When `true`, the keys of the first record are used as the column names (default: `true`). Can also be a list of column names. -workflow { - channel.from('Hello') - | map { it.reverse() } - | (foo & bar) - | mix - | view -} -``` +`mapper` +: Closure which defines how to transform each published value into a CSV record. The closure should return a list or map. By default, no transformation is applied. -In the above snippet, the initial channel is piped to the {ref}`operator-map` operator, which reverses the string value. Then, the result is passed to the processes `foo` and `bar`, which are executed in parallel. Each process outputs a channel, and the two channels are combined using the {ref}`operator-mix` operator. Finally, the result is printed using the {ref}`operator-view` operator. +`sep` +: The character used to separate values (default: `','`). diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy new file mode 100644 index 0000000000..ccac33162c --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy @@ -0,0 +1,74 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.extension + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import groovyx.gpars.dataflow.DataflowReadChannel +import nextflow.util.CsvWriter +/** + * + * @author Ben Sherman + */ +@Slf4j +@CompileStatic +class PublishIndexOp { + + private DataflowReadChannel source + + private Path path + + private Closure mapper + + private /* boolean | List */ header = true + + private String sep = ',' + + private List records = [] + + PublishIndexOp(DataflowReadChannel source, Path path, Map opts) { + this.source = source + this.path = path + if( opts.mapper ) + this.mapper = opts.mapper as Closure + if( opts.header != null ) + this.header = opts.header + if( opts.sep ) + this.sep = opts.sep as String + } + + void apply() { + final events = new HashMap(2) + events.onNext = this.&onNext + events.onComplete = this.&onComplete + DataflowHelper.subscribeImpl(source, events) + } + + protected void onNext(value) { + final normalized = mapper != null ? mapper.call(value) : value + log.trace "Normalized record for index file: ${normalized}" + records << normalized + } + + protected void onComplete(nope) { + log.trace "Saving records to index file: ${records}" + new CsvWriter(header: header, sep: sep).apply(records, path) + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index c84b9d3a9a..c41b1721d6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -24,7 +24,9 @@ import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowWriteChannel import nextflow.exception.ScriptRuntimeException import nextflow.extension.CH +import nextflow.extension.MixOp import nextflow.extension.PublishOp +import nextflow.extension.PublishIndexOp /** * Models the workflow publish definition * @@ -61,7 +63,7 @@ class PublishDef { @CompileStatic class PublishDsl { - private Map targetConfigs = [:] + private Map publishConfigs = [:] private Path directory @@ -116,7 +118,7 @@ class PublishDsl { } void target(String name, Closure closure) { - if( targetConfigs.containsKey(name) ) + if( publishConfigs.containsKey(name) ) throw new ScriptRuntimeException("Target '${name}' is defined more than once in the workflow publish definition") final dsl = new TargetDsl() @@ -125,16 +127,34 @@ class PublishDsl { cl.setDelegate(dsl) cl.call() - targetConfigs[name] = dsl.getOptions() + publishConfigs[name] = dsl.getOptions() } void build(Map targets) { - for( final entry : targets ) { - final source = entry.key - final name = entry.value - final opts = publishOptions(name, targetConfigs[name] ?: [:]) + // construct mapping of target name -> source channels + final Map> publishSources = [:] + for( final source : targets.keySet() ) { + final name = targets[source] + if( name !in publishSources ) + publishSources[name] = [] + publishSources[name] << source + } - new PublishOp(CH.getReadChannel(source), opts).apply() + // create publish op (and optional index op) for each target + for( final name : publishSources.keySet() ) { + final sources = publishSources[name] + final mixed = sources.size() > 1 + ? new MixOp(sources.collect( ch -> CH.getReadChannel(ch) )).apply() + : sources.first() + final opts = publishOptions(name, publishConfigs[name] ?: [:]) + + new PublishOp(CH.getReadChannel(mixed), opts).apply() + + if( opts.index ) { + final indexPath = (opts.path as Path).resolve(opts.index as String) + final indexOpts = opts.indexOpts as Map + new PublishIndexOp(CH.getReadChannel(mixed), indexPath, indexOpts).apply() + } } } @@ -173,6 +193,22 @@ class PublishDsl { setOption('ignoreErrors', value) } + void index(String path, Closure closure=null) { + setOption('index', path) + + if( closure != null ) { + final dsl = new IndexDsl() + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + opts.indexOpts = dsl.getOptions() + } + else { + opts.indexOpts = Map.of() + } + } + void mode(String value) { setOption('mode', value) } @@ -209,4 +245,36 @@ class PublishDsl { } + static class IndexDsl { + + private Map opts = [:] + + void header(boolean value) { + setOption('header', value) + } + + void header(List value) { + setOption('header', value) + } + + void mapper(Closure value) { + setOption('mapper', value) + } + + void sep(String value) { + setOption('sep', value) + } + + private void setOption(String name, Object value) { + if( opts.containsKey(name) ) + throw new ScriptRuntimeException("Index option `${name}` cannot be defined more than once for a given index definition") + opts[name] = value + } + + Map getOptions() { + opts + } + + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy new file mode 100644 index 0000000000..c69a2db162 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy @@ -0,0 +1,68 @@ +/* + * Copyright 2024, Ben Sherman + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util + +import java.nio.file.Path + +import groovy.transform.CompileStatic + +@CompileStatic +class CsvWriter { + + private /* boolean | List */ header = false + + private String sep = ',' + + CsvWriter(Map opts) { + if( opts.header ) + this.header = opts.header + + if( opts.sep ) + this.sep = opts.sep.toString() + } + + void apply(List records, Path path) { + Collection columns + if( header == true ) { + final first = records.first() + if( first !instanceof Map ) + throw new IllegalArgumentException('Records must be map objects when header=true') + columns = ((Map)first).keySet() + } + else if( header instanceof List ) { + columns = header + } + + if( columns ) + path << columns.collect(it -> '"' + it + '"').join(sep) << '\n' + + for( final record : records ) { + Collection values + if( record instanceof List ) + values = record + else if( record instanceof Map ) + values = columns + ? record.subMap(columns).values() + : record.values() + else + throw new IllegalArgumentException('Records must be list or map objects') + + path << values.collect(it -> '"' + it + '"').join(sep) << '\n' + } + } + +} diff --git a/modules/nf-commons/src/main/nextflow/util/HashBuilder.java b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java index 235e211f40..838aed0cdb 100644 --- a/modules/nf-commons/src/main/nextflow/util/HashBuilder.java +++ b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java @@ -75,7 +75,7 @@ public class HashBuilder { } public static Hasher defaultHasher() { - return HashBuilder.DEFAULT_HASHING.newHasher(); + return DEFAULT_HASHING.newHasher(); } private Hasher hasher = defaultHasher(); diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index d02e1c93c8..be9a6ceec5 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -78,6 +78,11 @@ publish { 'data' { mode 'link' + index('index.csv') { + mapper { val -> [filename: val.name] } + header true + sep ',' + } } 'more/data' { From e60403d974f99a8972d99b39d9922e6539f6335c Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 18 Apr 2024 16:28:30 -0500 Subject: [PATCH 34/47] Fix failing tests Signed-off-by: Ben Sherman --- .../nextflow/processor/PublishDir.groovy | 2 +- .../groovy/nextflow/script/PublishDef.groovy | 2 + .../nextflow/extension/PublishOpTest.groovy | 8 +- .../nextflow/processor/PublishDirTest.groovy | 24 ++-- .../test/nextflow/util/CacheHelperTest.groovy | 98 ------------- .../test/nextflow/util/HashBuilderTest.groovy | 130 ++++++++++++++++++ 6 files changed, 147 insertions(+), 117 deletions(-) create mode 100644 modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 0892a5b3a7..68b4173283 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -79,7 +79,7 @@ class PublishDir { /** * Whether to overwrite existing files */ - def /* Boolean | String */ overwrite = 'standard' + def /* Boolean | String */ overwrite /** * The publish {@link Mode} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index c41b1721d6..46e9e42067 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -165,6 +165,8 @@ class PublishDsl { final opts = defaults + overrides if( opts.containsKey('ignoreErrors') ) opts.failOnError = !opts.remove('ignoreErrors') + if( !opts.containsKey('overwrite') ) + opts.overwrite = 'standard' final path = opts.path as String ?: name if( path.startsWith('/') ) diff --git a/modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy b/modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy index 4923606887..e8530c9e3b 100644 --- a/modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy @@ -54,8 +54,12 @@ class PublishOpTest extends BaseSpec { when: def now = System.currentTimeMillis() - def op = new PublishOp(ch, [to:target, mode:'symlink']) .apply() - while( !op.complete ) { sleep 100; if( System.currentTimeMillis()-now>5_000) throw new TimeoutException() } + def op = new PublishOp(ch, [path:target, mode:'symlink']).apply() + while( !op.complete ) { + sleep 100 + if( System.currentTimeMillis() - now > 5_000 ) + throw new TimeoutException() + } then: target.resolve('file1.txt').text == 'Hello' target.resolve('file2.txt').text == 'world' diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/PublishDirTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/PublishDirTest.groovy index 74ba70740f..60f183fb23 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/PublishDirTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/PublishDirTest.groovy @@ -44,17 +44,17 @@ class PublishDirTest extends Specification { publish.path == Paths.get('/data') when: - publish = PublishDir.create(path: 'data') + publish = PublishDir.create(path: 'data') then: publish.path == Paths.get('data').complete() when: - publish = PublishDir.create( path: Paths.get('data') ) + publish = PublishDir.create( path: Paths.get('data') ) then: publish.path == Paths.get('data').complete() when: - publish = PublishDir.create( [path: '/some/dir', overwrite: true, pattern: '*.bam', mode: 'link'] ) + publish = PublishDir.create( [path: '/some/dir', overwrite: true, pattern: '*.bam', mode: 'link'] ) then: publish.path == Paths.get('/some/dir') publish.mode == PublishDir.Mode.LINK @@ -63,7 +63,7 @@ class PublishDirTest extends Specification { publish.enabled when: - publish = PublishDir.create( [path: '/some/data', mode: 'copy', enabled: false] ) + publish = PublishDir.create( [path: '/some/data', mode: 'copy', enabled: false] ) then: publish.path == Paths.get('/some/data') publish.mode == PublishDir.Mode.COPY @@ -72,7 +72,7 @@ class PublishDirTest extends Specification { !publish.enabled when: - publish = PublishDir.create( [path: '/some/data', mode: 'copy', enabled: 'false'] ) + publish = PublishDir.create( [path: '/some/data', mode: 'copy', enabled: 'false'] ) then: publish.path == Paths.get('/some/data') publish.mode == PublishDir.Mode.COPY @@ -81,15 +81,7 @@ class PublishDirTest extends Specification { !publish.enabled when: - publish = PublishDir.create( [path:'this/folder', overwrite: false, pattern: '*.txt', mode: 'copy'] ) - then: - publish.path == Paths.get('this/folder').complete() - publish.mode == PublishDir.Mode.COPY - publish.pattern == '*.txt' - publish.overwrite == false - - when: - publish = PublishDir.create( [path:'this/folder', overwrite: 'false', pattern: '*.txt', mode: 'copy'] ) + publish = PublishDir.create( [path:'this/folder', overwrite: false, pattern: '*.txt', mode: 'copy'] ) then: publish.path == Paths.get('this/folder').complete() publish.mode == PublishDir.Mode.COPY @@ -132,7 +124,7 @@ class PublishDirTest extends Specification { def task = new TaskRun(workDir: workDir, config: new TaskConfig(), name: 'foo') when: - def outputs = [ + def outputs = [ workDir.resolve('file1.txt'), workDir.resolve('file2.bam'), workDir.resolve('file3.fastq') @@ -326,7 +318,7 @@ class PublishDirTest extends Specification { def task = new TaskRun(workDir: workDir, config: Mock(TaskConfig)) when: - def outputs = [ + def outputs = [ workDir.resolve('file1.txt'), ] as Set def publisher = new PublishDir(path: publishDir, enabled: false) diff --git a/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy b/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy index a599297c20..9ccaef47b4 100644 --- a/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy +++ b/modules/nf-commons/src/test/nextflow/util/CacheHelperTest.groovy @@ -17,7 +17,6 @@ package nextflow.util import java.nio.file.Files -import java.nio.file.Paths import java.nio.file.attribute.FileTime import com.google.common.hash.Hashing @@ -85,45 +84,6 @@ class CacheHelperTest extends Specification { } - def testHashContent() { - setup: - def path1 = Files.createTempFile('test-hash-content',null) - def path2 = Files.createTempFile('test-hash-content',null) - def path3 = Files.createTempFile('test-hash-content',null) - - path1.text = ''' - line 1 - line 2 - line 3 the file content - ''' - - - path2.text = ''' - line 1 - line 2 - line 3 the file content - ''' - - path3.text = ''' - line 1 - line 1 - line 1 the file content - ''' - - expect: - CacheHelper.hashContent(path1) == CacheHelper.hashContent(path2) - CacheHelper.hashContent(path1) != CacheHelper.hashContent(path3) - CacheHelper.hashContent(path1, Hashing.md5()) == CacheHelper.hashContent(path2,Hashing.md5()) - CacheHelper.hashContent(path1, Hashing.md5()) != CacheHelper.hashContent(path3,Hashing.md5()) - - cleanup: - path1.delete() - path2.delete() - path3.delete() - - } - - def testHashOrder () { when: @@ -236,37 +196,6 @@ class CacheHelperTest extends Specification { 'lenient' | CacheHelper.HashMode.LENIENT 'sha256' | CacheHelper.HashMode.SHA256 } - - def 'should validate is asset file'() { - when: - def BASE = Paths.get("/some/pipeline/dir") - and: - Global.session = Mock(Session) { getBaseDir() >> BASE } - then: - !CacheHelper.isAssetFile(BASE.resolve('foo')) - - - when: - Global.session = Mock(Session) { - getBaseDir() >> BASE - getCommitId() >> '123456' - } - then: - CacheHelper.isAssetFile(BASE.resolve('foo')) - and: - !CacheHelper.isAssetFile(Paths.get('/other/dir')) - } - - - def 'should hash file content'() { - given: - def EXPECTED = '64ec88ca00b268e5ba1a35678a1b5316d212f4f366b2477232534a8aeca37f3c' - def file = TestHelper.createInMemTempFile('foo', 'Hello world') - expect: - CacheHelper.hashFileSha256Impl0(file) == EXPECTED - and: - CacheHelper.hashFileSha256Impl0(file) == DigestUtils.sha256Hex(file.bytes) - } def 'should hash content with sha256' () { given: @@ -283,31 +212,4 @@ class CacheHelperTest extends Specification { CacheHelper.hasher(file, CacheHelper.HashMode.SHA256).hash().toString() == 'd29e7ba0fbcc617ab8e1e44e81381aed' } - def 'should hash dir content with sha256'() { - given: - def folder = TestHelper.createInMemTempDir() - folder.resolve('dir1').mkdir() - folder.resolve('dir2').mkdir() - and: - folder.resolve('dir1/foo').text = "I'm foo" - folder.resolve('dir1/bar').text = "I'm bar" - folder.resolve('dir1/xxx/yyy').mkdirs() - folder.resolve('dir1/xxx/foo1').text = "I'm foo within xxx" - folder.resolve('dir1/xxx/yyy/bar1').text = "I'm bar within yyy" - and: - folder.resolve('dir2/foo').text = "I'm foo" - folder.resolve('dir2/bar').text = "I'm bar" - folder.resolve('dir2/xxx/yyy').mkdirs() - folder.resolve('dir2/xxx/foo1').text = "I'm foo within xxx" - folder.resolve('dir2/xxx/yyy/bar1').text = "I'm bar within yyy" - - when: - def hash1 = CacheHelper.hashDirSha256(CacheHelper.defaultHasher(), folder.resolve('dir1'), folder.resolve('dir1')) - and: - def hash2 = CacheHelper.hashDirSha256(CacheHelper.defaultHasher(), folder.resolve('dir2'), folder.resolve('dir2')) - - then: - hash1.hash() == hash2.hash() - - } } diff --git a/modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy b/modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy new file mode 100644 index 0000000000..79c380ae94 --- /dev/null +++ b/modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy @@ -0,0 +1,130 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.util + +import java.nio.file.Files +import java.nio.file.Paths + +import com.google.common.hash.Hashing +import nextflow.Global +import nextflow.Session +import org.apache.commons.codec.digest.DigestUtils +import spock.lang.Specification +import test.TestHelper +/** + * + * @author Paolo Di Tommaso + */ +class HashBuilderTest extends Specification { + + + def testHashContent() { + setup: + def path1 = Files.createTempFile('test-hash-content',null) + def path2 = Files.createTempFile('test-hash-content',null) + def path3 = Files.createTempFile('test-hash-content',null) + + path1.text = ''' + line 1 + line 2 + line 3 the file content + ''' + + + path2.text = ''' + line 1 + line 2 + line 3 the file content + ''' + + path3.text = ''' + line 1 + line 1 + line 1 the file content + ''' + + expect: + HashBuilder.hashContent(path1) == HashBuilder.hashContent(path2) + HashBuilder.hashContent(path1) != HashBuilder.hashContent(path3) + HashBuilder.hashContent(path1, Hashing.md5()) == HashBuilder.hashContent(path2,Hashing.md5()) + HashBuilder.hashContent(path1, Hashing.md5()) != HashBuilder.hashContent(path3,Hashing.md5()) + + cleanup: + path1.delete() + path2.delete() + path3.delete() + + } + + def 'should validate is asset file'() { + when: + def BASE = Paths.get("/some/pipeline/dir") + and: + Global.session = Mock(Session) { getBaseDir() >> BASE } + then: + !HashBuilder.isAssetFile(BASE.resolve('foo')) + + + when: + Global.session = Mock(Session) { + getBaseDir() >> BASE + getCommitId() >> '123456' + } + then: + HashBuilder.isAssetFile(BASE.resolve('foo')) + and: + !HashBuilder.isAssetFile(Paths.get('/other/dir')) + } + + def 'should hash file content'() { + given: + def EXPECTED = '64ec88ca00b268e5ba1a35678a1b5316d212f4f366b2477232534a8aeca37f3c' + def file = TestHelper.createInMemTempFile('foo', 'Hello world') + expect: + HashBuilder.hashFileSha256Impl0(file) == EXPECTED + and: + HashBuilder.hashFileSha256Impl0(file) == DigestUtils.sha256Hex(file.bytes) + } + + def 'should hash dir content with sha256'() { + given: + def folder = TestHelper.createInMemTempDir() + folder.resolve('dir1').mkdir() + folder.resolve('dir2').mkdir() + and: + folder.resolve('dir1/foo').text = "I'm foo" + folder.resolve('dir1/bar').text = "I'm bar" + folder.resolve('dir1/xxx/yyy').mkdirs() + folder.resolve('dir1/xxx/foo1').text = "I'm foo within xxx" + folder.resolve('dir1/xxx/yyy/bar1').text = "I'm bar within yyy" + and: + folder.resolve('dir2/foo').text = "I'm foo" + folder.resolve('dir2/bar').text = "I'm bar" + folder.resolve('dir2/xxx/yyy').mkdirs() + folder.resolve('dir2/xxx/foo1').text = "I'm foo within xxx" + folder.resolve('dir2/xxx/yyy/bar1').text = "I'm bar within yyy" + + when: + def hash1 = HashBuilder.hashDirSha256(HashBuilder.defaultHasher(), folder.resolve('dir1'), folder.resolve('dir1')) + and: + def hash2 = HashBuilder.hashDirSha256(HashBuilder.defaultHasher(), folder.resolve('dir2'), folder.resolve('dir2')) + + then: + hash1.hash() == hash2.hash() + + } +} From 12c27208c6edc4aafbecd7c42d669284c5cd84f9 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 22 Apr 2024 16:45:29 -0500 Subject: [PATCH 35/47] Apply suggestions from review Signed-off-by: Ben Sherman --- docs/workflow.md | 12 +++- .../nextflow/extension/PublishIndexOp.groovy | 71 +++++++++++++++++-- .../groovy/nextflow/script/PublishDef.groovy | 34 ++++----- .../groovy/nextflow/util/CsvWriter.groovy | 2 + tests/publish-dsl.nf | 5 +- 5 files changed, 98 insertions(+), 26 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index 878e3006fb..ebeb093acc 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -634,7 +634,9 @@ publish { directory 'results' 'foo/' { - index 'index.csv' + index { + path 'index.csv' + } } } ``` @@ -651,7 +653,8 @@ The above example will write the following CSV file to `results/foo/index.csv`: You can customize the index file by specifying options in a block, for example: ```groovy -index('index.csv') { +index { + path 'index.csv' header ['name', 'extra_option'] sep '\t' mapper { val -> val + [extra_option: 'bar'] } @@ -661,10 +664,13 @@ index('index.csv') { The following options are available: `header` -: When `true`, the keys of the first record are used as the column names (default: `true`). Can also be a list of column names. +: When `true`, the keys of the first record are used as the column names (default: `false`). Can also be a list of column names. `mapper` : Closure which defines how to transform each published value into a CSV record. The closure should return a list or map. By default, no transformation is applied. +`path` +: The name of the index file relative to the target path (required). + `sep` : The character used to separate values (default: `','`). diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy index ccac33162c..f56f94588a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy @@ -21,6 +21,8 @@ import java.nio.file.Path import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowReadChannel +import nextflow.Global +import nextflow.Session import nextflow.util.CsvWriter /** * @@ -32,19 +34,24 @@ class PublishIndexOp { private DataflowReadChannel source + private Path basePath + private Path path private Closure mapper - private /* boolean | List */ header = true + private /* boolean | List */ header = false private String sep = ',' private List records = [] - PublishIndexOp(DataflowReadChannel source, Path path, Map opts) { + private Session getSession() { Global.session as Session } + + PublishIndexOp(DataflowReadChannel source, Path basePath, String indexPath, Map opts) { this.source = source - this.path = path + this.basePath = basePath + this.path = basePath.resolve(indexPath) if( opts.mapper ) this.mapper = opts.mapper as Closure if( opts.header != null ) @@ -61,7 +68,8 @@ class PublishIndexOp { } protected void onNext(value) { - final normalized = mapper != null ? mapper.call(value) : value + final record = mapper != null ? mapper.call(value) : value + final normalized = normalizePaths(record) log.trace "Normalized record for index file: ${normalized}" records << normalized } @@ -71,4 +79,59 @@ class PublishIndexOp { new CsvWriter(header: header, sep: sep).apply(records, path) } + protected Object normalizePaths(value) { + if( value instanceof Collection ) { + return value.collect { el -> + if( el instanceof Path ) + return normalizePath(el) + if( el instanceof Collection ) + return normalizePaths(el) + return el + } + } + + if( value instanceof Map ) { + return value.collectEntries { k, v -> + if( v instanceof Path ) + return List.of(k, normalizePath(v)) + if( v instanceof Collection ) + return List.of(k, normalizePaths(v)) + return List.of(k, v) + } + } + + throw new IllegalArgumentException("Index file record must be a list or map: ${value} [${value.class.simpleName}]") + } + + private Path normalizePath(Path path) { + final sourceDir = getTaskDir(path) + return basePath.resolve(sourceDir.relativize(path)) + } + + /** + * Given a path try to infer the task directory to which the path below + * ie. the directory starting with a workflow work dir and having at lest + * two sub-directories eg work-dir/xx/yyyyyy/etc + * + * @param path + */ + protected Path getTaskDir(Path path) { + if( path == null ) + return null + return getTaskDir0(path, session.workDir.resolve('tmp')) + ?: getTaskDir0(path, session.workDir) + ?: getTaskDir0(path, session.bucketDir) + } + + private Path getTaskDir0(Path file, Path base) { + if( base == null ) + return null + if( base.fileSystem != file.fileSystem ) + return null + final len = base.nameCount + if( file.startsWith(base) && file.getNameCount() > len+2 ) + return base.resolve(file.subpath(len,len+2)) + return null + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index 46e9e42067..694f109535 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -151,9 +151,12 @@ class PublishDsl { new PublishOp(CH.getReadChannel(mixed), opts).apply() if( opts.index ) { - final indexPath = (opts.path as Path).resolve(opts.index as String) - final indexOpts = opts.indexOpts as Map - new PublishIndexOp(CH.getReadChannel(mixed), indexPath, indexOpts).apply() + final basePath = opts.path as Path + final indexOpts = opts.index as Map + final indexPath = indexOpts.path as String + if( !indexPath ) + throw new ScriptRuntimeException("Index file definition for publish target '${name}' is missing `path` option") + new PublishIndexOp(CH.getReadChannel(mixed), basePath, indexPath, indexOpts).apply() } } } @@ -195,20 +198,13 @@ class PublishDsl { setOption('ignoreErrors', value) } - void index(String path, Closure closure=null) { - setOption('index', path) - - if( closure != null ) { - final dsl = new IndexDsl() - final cl = (Closure)closure.clone() - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.setDelegate(dsl) - cl.call() - opts.indexOpts = dsl.getOptions() - } - else { - opts.indexOpts = Map.of() - } + void index(Closure closure) { + final dsl = new IndexDsl() + final cl = (Closure)closure.clone() + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.setDelegate(dsl) + cl.call() + setOption('index', dsl.getOptions()) } void mode(String value) { @@ -263,6 +259,10 @@ class PublishDsl { setOption('mapper', value) } + void path(String value) { + setOption('path', value) + } + void sep(String value) { setOption('sep', value) } diff --git a/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy index c69a2db162..b2d4736bbc 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/CsvWriter.groovy @@ -47,6 +47,8 @@ class CsvWriter { columns = header } + path.delete() + if( columns ) path << columns.collect(it -> '"' + it + '"').join(sep) << '\n' diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index be9a6ceec5..b78e625f34 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -78,8 +78,9 @@ publish { 'data' { mode 'link' - index('index.csv') { - mapper { val -> [filename: val.name] } + index { + path 'index.csv' + mapper { val -> [filename: val] } header true sep ',' } From d99887876cf878378cd63529bd0788d216c6687c Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 24 Apr 2024 02:23:58 -0500 Subject: [PATCH 36/47] Don't write index file if no records were published Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/extension/PublishIndexOp.groovy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy index f56f94588a..50b327c453 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy @@ -75,6 +75,8 @@ class PublishIndexOp { } protected void onComplete(nope) { + if( records.size() == 0 ) + return log.trace "Saving records to index file: ${records}" new CsvWriter(header: header, sep: sep).apply(records, path) } From 95a110ac629031e2ce9595d120816def151de824 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 24 Apr 2024 02:54:47 -0500 Subject: [PATCH 37/47] Redirect to `null` to disable publishing Signed-off-by: Ben Sherman --- docs/workflow.md | 23 ++++++---- .../nextflow/ast/NextflowDSLImpl.groovy | 46 +++++++++++++++---- .../groovy/nextflow/script/PublishDef.groovy | 10 +--- tests/publish-dsl.nf | 4 +- 4 files changed, 56 insertions(+), 27 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index ebeb093acc..a26c59a5a5 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -524,7 +524,18 @@ The trailing slash in the target name is not required; it is only used to denote The target name must not begin with a slash (`/`), it should be a relative path name. ::: -Publish targets can also be customized in the publish definition using a set of options similar to the {ref}`process-publishdir` directive. +Workflows can also disable publishing for specific channels by redirecting them to `null`: + +```groovy +workflow { + ch_foo = foo() + + publish: + ch_foo >> params.save_foo ? 'foo/' : null +} +``` + +Publish targets can be customized in the publish definition using a set of options similar to the {ref}`process-publishdir` directive. For example: @@ -534,17 +545,12 @@ publish { mode 'copy' 'foo/' { - enabled params.save_foo mode 'link' } } ``` -In this example, the following publish options are applied: - -- All files will be copied by default - -- Files published to `foo/` will be hard-linked, overriding the default option. Additionally, these files will be published only if `params.save_foo` is true. +In this example, all files will be copied by default, and files published to `foo/` will be hard-linked, overriding the default option. Available options: @@ -552,9 +558,6 @@ Available options: : *Currently only supported for S3.* : Specify the media type a.k.a. [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_Types) of published files (default: `false`). Can be a string (e.g. `'text/html'`), or `true` to infer the content type from the file extension. -`enabled` -: Enable or disable publishing (default: `true`). - `ignoreErrors` : When `true`, the workflow will not fail if a file can't be published for some reason (default: `false`). diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index 7f058b859d..ea34e2cf8e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -44,6 +44,7 @@ import org.codehaus.groovy.ast.Parameter import org.codehaus.groovy.ast.VariableScope import org.codehaus.groovy.ast.expr.ArgumentListExpression import org.codehaus.groovy.ast.expr.BinaryExpression +import org.codehaus.groovy.ast.expr.BooleanExpression import org.codehaus.groovy.ast.expr.CastExpression import org.codehaus.groovy.ast.expr.ClosureExpression import org.codehaus.groovy.ast.expr.ConstantExpression @@ -54,6 +55,7 @@ import org.codehaus.groovy.ast.expr.MapEntryExpression import org.codehaus.groovy.ast.expr.MapExpression import org.codehaus.groovy.ast.expr.MethodCallExpression import org.codehaus.groovy.ast.expr.PropertyExpression +import org.codehaus.groovy.ast.expr.TernaryExpression import org.codehaus.groovy.ast.expr.TupleExpression import org.codehaus.groovy.ast.expr.UnaryMinusExpression import org.codehaus.groovy.ast.expr.VariableExpression @@ -431,18 +433,46 @@ class NextflowDSLImpl implements ASTTransformation { } protected Statement normWorkflowPublish(ExpressionStatement stm) { - if( stm.expression !instanceof BinaryExpression ) { - syntaxError(stm, "Invalid workflow publish statement") - return stm + // HACK: fix ternary expression for publish target + // right shift takes precedence over ternary in Groovy grammar + // custom parser will handle this more elegantly + if( stm.expression instanceof TernaryExpression ) { + final ternaryX = (TernaryExpression)stm.expression + if( ternaryX.booleanExpression.expression !instanceof BinaryExpression ) { + syntaxError(stm, "Invalid workflow publish statement") + return stm + } + + final binaryX = (BinaryExpression)ternaryX.booleanExpression.expression + if( binaryX.operation.type != Types.RIGHT_SHIFT ) { + syntaxError(stm, "Invalid workflow publish statement") + return stm + } + + // transform: + // (ch_foo >> params.save_foo) ? 'foo' : null + // to: + // ch_foo >> (params.save_foo ? 'foo' : null) + final target = new TernaryExpression( + new BooleanExpression(binaryX.rightExpression), + ternaryX.trueExpression, + ternaryX.falseExpression + ) + return stmt( callThisX('_publish_target', args(binaryX.leftExpression, target)) ) } - final binaryX = (BinaryExpression)stm.expression - if( binaryX.operation.type != Types.RIGHT_SHIFT ) { - syntaxError(stm, "Invalid workflow publish statement") - return stm + if( stm.expression instanceof BinaryExpression ) { + final binaryX = (BinaryExpression)stm.expression + if( binaryX.operation.type != Types.RIGHT_SHIFT ) { + syntaxError(stm, "Invalid workflow publish statement") + return stm + } + + return stmt( callThisX('_publish_target', args(binaryX.leftExpression, binaryX.rightExpression)) ) } - return stmt( callThisX('_publish_target', args(binaryX.leftExpression, binaryX.rightExpression)) ) + syntaxError(stm, "Invalid workflow publish statement") + return stm } protected Expression makeWorkflowDefWrapper( ClosureExpression closure, boolean anonymous ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index 694f109535..b79a3ca2df 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -83,10 +83,6 @@ class PublishDsl { setDefault('contentType', value) } - void enabled(boolean value) { - setDefault('enabled', value) - } - void ignoreErrors(boolean value) { setDefault('ignoreErrors', value) } @@ -135,6 +131,8 @@ class PublishDsl { final Map> publishSources = [:] for( final source : targets.keySet() ) { final name = targets[source] + if( !name ) + continue if( name !in publishSources ) publishSources[name] = [] publishSources[name] << source @@ -190,10 +188,6 @@ class PublishDsl { setOption('contentType', value) } - void enabled(boolean value) { - setOption('enabled', value) - } - void ignoreErrors(boolean value) { setOption('ignoreErrors', value) } diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index b78e625f34..bd301d3c72 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -16,6 +16,8 @@ */ nextflow.preview.publish = true +params.save_foo = true + process align { input: val(x) @@ -70,7 +72,7 @@ workflow { publish: align.out >> 'data' my_combine.out >> 'more/data' - foo.out >> 'data' + foo.out >> params.save_foo ? 'data' : null } publish { From 7953e79726645f188fecc535e2abad3db12df4f0 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 24 Apr 2024 03:23:11 -0500 Subject: [PATCH 38/47] Remove ternary hack, require parentheses instead Signed-off-by: Ben Sherman --- docs/workflow.md | 2 +- .../nextflow/ast/NextflowDSLImpl.groovy | 46 ++++--------------- tests/publish-dsl.nf | 2 +- 3 files changed, 10 insertions(+), 40 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index a26c59a5a5..279e0d3829 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -531,7 +531,7 @@ workflow { ch_foo = foo() publish: - ch_foo >> params.save_foo ? 'foo/' : null + ch_foo >> (params.save_foo ? 'foo/' : null) } ``` diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index ea34e2cf8e..7f058b859d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -44,7 +44,6 @@ import org.codehaus.groovy.ast.Parameter import org.codehaus.groovy.ast.VariableScope import org.codehaus.groovy.ast.expr.ArgumentListExpression import org.codehaus.groovy.ast.expr.BinaryExpression -import org.codehaus.groovy.ast.expr.BooleanExpression import org.codehaus.groovy.ast.expr.CastExpression import org.codehaus.groovy.ast.expr.ClosureExpression import org.codehaus.groovy.ast.expr.ConstantExpression @@ -55,7 +54,6 @@ import org.codehaus.groovy.ast.expr.MapEntryExpression import org.codehaus.groovy.ast.expr.MapExpression import org.codehaus.groovy.ast.expr.MethodCallExpression import org.codehaus.groovy.ast.expr.PropertyExpression -import org.codehaus.groovy.ast.expr.TernaryExpression import org.codehaus.groovy.ast.expr.TupleExpression import org.codehaus.groovy.ast.expr.UnaryMinusExpression import org.codehaus.groovy.ast.expr.VariableExpression @@ -433,46 +431,18 @@ class NextflowDSLImpl implements ASTTransformation { } protected Statement normWorkflowPublish(ExpressionStatement stm) { - // HACK: fix ternary expression for publish target - // right shift takes precedence over ternary in Groovy grammar - // custom parser will handle this more elegantly - if( stm.expression instanceof TernaryExpression ) { - final ternaryX = (TernaryExpression)stm.expression - if( ternaryX.booleanExpression.expression !instanceof BinaryExpression ) { - syntaxError(stm, "Invalid workflow publish statement") - return stm - } - - final binaryX = (BinaryExpression)ternaryX.booleanExpression.expression - if( binaryX.operation.type != Types.RIGHT_SHIFT ) { - syntaxError(stm, "Invalid workflow publish statement") - return stm - } - - // transform: - // (ch_foo >> params.save_foo) ? 'foo' : null - // to: - // ch_foo >> (params.save_foo ? 'foo' : null) - final target = new TernaryExpression( - new BooleanExpression(binaryX.rightExpression), - ternaryX.trueExpression, - ternaryX.falseExpression - ) - return stmt( callThisX('_publish_target', args(binaryX.leftExpression, target)) ) + if( stm.expression !instanceof BinaryExpression ) { + syntaxError(stm, "Invalid workflow publish statement") + return stm } - if( stm.expression instanceof BinaryExpression ) { - final binaryX = (BinaryExpression)stm.expression - if( binaryX.operation.type != Types.RIGHT_SHIFT ) { - syntaxError(stm, "Invalid workflow publish statement") - return stm - } - - return stmt( callThisX('_publish_target', args(binaryX.leftExpression, binaryX.rightExpression)) ) + final binaryX = (BinaryExpression)stm.expression + if( binaryX.operation.type != Types.RIGHT_SHIFT ) { + syntaxError(stm, "Invalid workflow publish statement") + return stm } - syntaxError(stm, "Invalid workflow publish statement") - return stm + return stmt( callThisX('_publish_target', args(binaryX.leftExpression, binaryX.rightExpression)) ) } protected Expression makeWorkflowDefWrapper( ClosureExpression closure, boolean anonymous ) { diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index bd301d3c72..38e92273e8 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -72,7 +72,7 @@ workflow { publish: align.out >> 'data' my_combine.out >> 'more/data' - foo.out >> params.save_foo ? 'data' : null + foo.out >> (params.save_foo ? 'data' : null) } publish { From 856209bcd8b0e45d5dd92c8cac68d7b84ca150e2 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 29 Apr 2024 17:56:00 -0500 Subject: [PATCH 39/47] Replace publish path option with ability to reroute targets in publish section Signed-off-by: Ben Sherman --- docs/workflow.md | 9 +++++---- .../src/main/groovy/nextflow/script/PublishDef.groovy | 11 +++-------- .../groovy/nextflow/script/WorkflowBinding.groovy | 7 +++++++ tests/publish-dsl.nf | 11 +++++++++-- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/docs/workflow.md b/docs/workflow.md index 279e0d3829..2ab705bf86 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -453,7 +453,11 @@ In the above example, the output `results` of process `foo` is published to the In a process, any output with an `emit` name can be published. In a workflow, any channel defined in the workflow, including process and subworkflow outputs, can be published. :::{note} -If a process/workflow output (e.g. `foo.out`) contains multiple channels, each channel will be published. Individual output channels can also be published by index or name (e.g. `foo.out[0]` or `foo.out.results`). +If the publish source is a process/workflow output (e.g. `foo.out`) with multiple channels, each channel will be published. Individual output channels can also be published by index or name (e.g. `foo.out[0]` or `foo.out.results`). +::: + +:::{note} +The publish source can also be a target name, in which case all channels published to the old target will be re-mapped to the new target. This is a useful way to override publish directories in calling workflows. ::: As shown in the example, workflows can override the publish targets of process and subworkflow outputs. This way, each process and workflow can define some sensible defaults for publishing, which can be overridden by calling workflows as needed. @@ -601,9 +605,6 @@ Available options: `'standard'` : Overwrite existing files when the file size or last modified timestamp is different. -`path` -: Specify the publish path relative to the output directory (default: the target name). Can only be specified within a target definition. - `storageClass` : *Currently only supported for S3.* : Specify the storage class for published files. diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index b79a3ca2df..7dc2804b96 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -159,9 +159,11 @@ class PublishDsl { } } - private Map publishOptions(String name, Map overrides) { + private Map publishOptions(String path, Map overrides) { if( !directory ) directory = Paths.get('.').complete() + if( path.startsWith('/') ) + throw new ScriptRuntimeException("Invalid publish target '${path}' -- it should be a relative path") final opts = defaults + overrides if( opts.containsKey('ignoreErrors') ) @@ -169,9 +171,6 @@ class PublishDsl { if( !opts.containsKey('overwrite') ) opts.overwrite = 'standard' - final path = opts.path as String ?: name - if( path.startsWith('/') ) - throw new ScriptRuntimeException("Invalid publish target path '${path}' -- it should be a relative path") opts.path = directory.resolve(path) return opts } @@ -213,10 +212,6 @@ class PublishDsl { setOption('overwrite', value) } - void path(String value) { - setOption('path', value) - } - void storageClass(String value) { setOption('storageClass', value) } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy index 105d67a246..7183b5f2e2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy @@ -165,4 +165,11 @@ class WorkflowBinding extends Binding { _publish_target(ch, name) } + void _publish_target(String oldTarget, String newTarget) { + final targets = owner.session.publishTargets + for( final source : targets.keySet() ) + if( targets[source] == oldTarget ) + targets[source] = newTarget + } + } diff --git a/tests/publish-dsl.nf b/tests/publish-dsl.nf index 38e92273e8..61f7ead3cc 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-dsl.nf @@ -58,7 +58,7 @@ process foo { ''' } -workflow { +workflow align_combine_foo { def input = Channel.of('alpha','beta','delta') align(input) @@ -75,6 +75,13 @@ workflow { foo.out >> (params.save_foo ? 'data' : null) } +workflow { + align_combine_foo() + + publish: + 'more/data' >> 'more' +} + publish { directory 'results' @@ -88,7 +95,7 @@ publish { } } - 'more/data' { + 'more' { mode 'copy' } } From 50484d92ab26532cc2178d28613e7e1f144b02f6 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 1 May 2024 14:54:32 -0500 Subject: [PATCH 40/47] Apply suggestions from review Signed-off-by: Ben Sherman --- docs/config.md | 4 +-- docs/workflow.md | 29 +++++++++---------- .../src/main/groovy/nextflow/NF.groovy | 4 +-- .../main/groovy/nextflow/NextflowMeta.groovy | 10 +++---- .../nextflow/ast/NextflowDSLImpl.groovy | 16 +++++----- .../groovy/nextflow/script/BaseScript.groovy | 10 +++---- .../groovy/nextflow/script/PublishDef.groovy | 11 +++++-- .../nextflow/script/WorkflowBinding.groovy | 7 ----- tests/{publish-dsl.nf => publish-def.nf} | 19 +++--------- 9 files changed, 48 insertions(+), 62 deletions(-) rename tests/{publish-dsl.nf => publish-def.nf} (89%) diff --git a/docs/config.md b/docs/config.md index 003191c973..61edb1e4bf 100644 --- a/docs/config.md +++ b/docs/config.md @@ -2082,14 +2082,14 @@ Some features can be enabled using the `nextflow.enable` and `nextflow.preview` - Nextflow will fail if multiple functions and/or processes with the same name are defined in a module script -`nextflow.preview.publish` +`nextflow.preview.output` : :::{versionadded} 24.04.0 ::: : *Experimental: may change in a future release.* -: When `true`, enables the use of the {ref}`workflow publish definition `. +: When `true`, enables the use of the {ref}`workflow output definition `. `nextflow.preview.recursion` diff --git a/docs/workflow.md b/docs/workflow.md index 2ab705bf86..e9450db7fd 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -393,7 +393,7 @@ workflow { In the above snippet, the initial channel is piped to the {ref}`operator-map` operator, which reverses the string value. Then, the result is passed to the processes `foo` and `bar`, which are executed in parallel. Each process outputs a channel, and the two channels are combined using the {ref}`operator-mix` operator. Finally, the result is printed using the {ref}`operator-view` operator. -(workflow-publish-def)= +(workflow-output-def)= ## Publishing outputs @@ -401,22 +401,22 @@ In the above snippet, the initial channel is piped to the {ref}`operator-map` op ::: :::{note} -This feature requires the `nextflow.preview.publish` feature flag to be enabled. +This feature requires the `nextflow.preview.output` feature flag to be enabled. ::: -A script may define the set of outputs that should be published by the implicit workflow, known as the workflow publish definition: +A script may define the set of outputs that should be published by the implicit workflow, known as the workflow output definition: ```groovy workflow { foo(bar()) } -publish { +output { directory 'results' } ``` -The publish definition must be defined after the implicit workflow. +The output definition must be defined after the implicit workflow. ### Publishing channels @@ -456,10 +456,6 @@ In a process, any output with an `emit` name can be published. In a workflow, an If the publish source is a process/workflow output (e.g. `foo.out`) with multiple channels, each channel will be published. Individual output channels can also be published by index or name (e.g. `foo.out[0]` or `foo.out.results`). ::: -:::{note} -The publish source can also be a target name, in which case all channels published to the old target will be re-mapped to the new target. This is a useful way to override publish directories in calling workflows. -::: - As shown in the example, workflows can override the publish targets of process and subworkflow outputs. This way, each process and workflow can define some sensible defaults for publishing, which can be overridden by calling workflows as needed. By default, all files emitted by the channel will be published into the specified directory. If a channel emits list values, any files in the list (including nested lists) will also be published. For example: @@ -480,7 +476,7 @@ workflow { The `directory` statement is used to set the top-level publish directory of the workflow: ```groovy -publish { +output { directory 'results' // ... @@ -493,7 +489,7 @@ It is optional, and it defaults to the launch directory (`workflow.launchDir`). A publish target is a name with a specific publish configuration. By default, when a channel is published to a target in the `publish:` section of a process or workflow, the target name is used as the publish path. -For example, given the following publish definition: +For example, given the following output definition: ```groovy workflow { @@ -505,7 +501,7 @@ workflow { ch_bar >> 'bar/' } -publish { +output { directory 'results' } ``` @@ -539,12 +535,12 @@ workflow { } ``` -Publish targets can be customized in the publish definition using a set of options similar to the {ref}`process-publishdir` directive. +Publish targets can be customized in the output definition using a set of options similar to the {ref}`process-publishdir` directive. For example: ```groovy -publish { +output { directory 'results' mode 'copy' @@ -605,6 +601,9 @@ Available options: `'standard'` : Overwrite existing files when the file size or last modified timestamp is different. +`path` +: Specify the publish path relative to the output directory (default: the target name). Can only be specified within a target definition. + `storageClass` : *Currently only supported for S3.* : Specify the storage class for published files. @@ -634,7 +633,7 @@ workflow { ch_foo >> 'foo/' } -publish { +output { directory 'results' 'foo/' { diff --git a/modules/nextflow/src/main/groovy/nextflow/NF.groovy b/modules/nextflow/src/main/groovy/nextflow/NF.groovy index 3963ad4b85..4767e41d56 100644 --- a/modules/nextflow/src/main/groovy/nextflow/NF.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/NF.groovy @@ -64,8 +64,8 @@ class NF { NextflowMeta.instance.isStrictModeEnabled() } - static boolean isPublishDefinitionEnabled() { - NextflowMeta.instance.preview.publish + static boolean isOutputDefinitionEnabled() { + NextflowMeta.instance.preview.output } static boolean isRecurseEnabled() { diff --git a/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy b/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy index 2d804ab6b5..ac7eeec866 100644 --- a/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy @@ -41,7 +41,7 @@ class NextflowMeta { static class Preview implements Flags { @Deprecated volatile float dsl @Deprecated boolean strict - boolean publish + boolean output boolean recursion boolean topic @@ -56,10 +56,10 @@ class NextflowMeta { dsl = num } - void setPublish(Boolean publish) { - if( publish ) - log.warn "WORKFLOW PUBLISH DEFINITION IS A PREVIEW FEATURE - SYNTAX AND FUNCTIONALITY CAN CHANGE IN FUTURE RELEASES" - this.publish = publish + void setOutput(Boolean output) { + if( output ) + log.warn "WORKFLOW OUTPUT DEFINITION IS A PREVIEW FEATURE - SYNTAX AND FUNCTIONALITY CAN CHANGE IN FUTURE RELEASES" + this.output = output } void setRecursion(Boolean recursion) { diff --git a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy index 7f058b859d..8cd91d0a81 100644 --- a/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/ast/NextflowDSLImpl.groovy @@ -179,8 +179,8 @@ class NextflowDSLImpl implements ASTTransformation { super.visitMethodCallExpression(methodCall) } - else if( methodName == 'publish' && preCondition ) { - convertPublishDef(methodCall,sourceUnit) + else if( methodName == 'output' && preCondition ) { + convertOutputDef(methodCall,sourceUnit) super.visitMethodCallExpression(methodCall) } @@ -519,29 +519,29 @@ class NextflowDSLImpl implements ASTTransformation { } /** - * Transform targets in the workflow publish definition: + * Transform targets in the workflow output definition: * - * publish { + * output { * 'foo' { ... } * } * * becomes: * - * publish { + * output { * target('foo') { ... } * } * * @param methodCall * @param unit */ - protected void convertPublishDef(MethodCallExpression methodCall, SourceUnit unit) { - log.trace "Convert 'publish' ${methodCall.arguments}" + protected void convertOutputDef(MethodCallExpression methodCall, SourceUnit unit) { + log.trace "Convert 'output' ${methodCall.arguments}" assert methodCall.arguments instanceof ArgumentListExpression final arguments = (ArgumentListExpression)methodCall.arguments if( arguments.size() != 1 || arguments[0] !instanceof ClosureExpression ) { - syntaxError(methodCall, "Invalid publish definition") + syntaxError(methodCall, "Invalid output definition") return } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy index 0d5c3ecc9a..4dd6577726 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy @@ -123,13 +123,13 @@ abstract class BaseScript extends Script implements ExecutionContext { meta.addDefinition(workflow) } - protected publish(Closure closure) { - if( !NF.publishDefinitionEnabled ) - throw new IllegalStateException("Workflow publish definition requires the `nextflow.preview.publish` feature flag") + protected output(Closure closure) { + if( !NF.outputDefinitionEnabled ) + throw new IllegalStateException("Workflow output definition requires the `nextflow.preview.output` feature flag") if( !entryFlow ) - throw new IllegalStateException("Workflow publish definition must be defined after the anonymous workflow") + throw new IllegalStateException("Workflow output definition must be defined after the anonymous workflow") if( ExecutionStack.withinWorkflow() ) - throw new IllegalStateException("Workflow publish definition is not allowed within a workflow") + throw new IllegalStateException("Workflow output definition is not allowed within a workflow") publisher = new PublishDef(closure) } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy index 7dc2804b96..b79a3ca2df 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy @@ -159,11 +159,9 @@ class PublishDsl { } } - private Map publishOptions(String path, Map overrides) { + private Map publishOptions(String name, Map overrides) { if( !directory ) directory = Paths.get('.').complete() - if( path.startsWith('/') ) - throw new ScriptRuntimeException("Invalid publish target '${path}' -- it should be a relative path") final opts = defaults + overrides if( opts.containsKey('ignoreErrors') ) @@ -171,6 +169,9 @@ class PublishDsl { if( !opts.containsKey('overwrite') ) opts.overwrite = 'standard' + final path = opts.path as String ?: name + if( path.startsWith('/') ) + throw new ScriptRuntimeException("Invalid publish target path '${path}' -- it should be a relative path") opts.path = directory.resolve(path) return opts } @@ -212,6 +213,10 @@ class PublishDsl { setOption('overwrite', value) } + void path(String value) { + setOption('path', value) + } + void storageClass(String value) { setOption('storageClass', value) } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy index 7183b5f2e2..105d67a246 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/WorkflowBinding.groovy @@ -165,11 +165,4 @@ class WorkflowBinding extends Binding { _publish_target(ch, name) } - void _publish_target(String oldTarget, String newTarget) { - final targets = owner.session.publishTargets - for( final source : targets.keySet() ) - if( targets[source] == oldTarget ) - targets[source] = newTarget - } - } diff --git a/tests/publish-dsl.nf b/tests/publish-def.nf similarity index 89% rename from tests/publish-dsl.nf rename to tests/publish-def.nf index 61f7ead3cc..22d9cea365 100644 --- a/tests/publish-dsl.nf +++ b/tests/publish-def.nf @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -nextflow.preview.publish = true +nextflow.preview.output = true params.save_foo = true @@ -58,7 +58,7 @@ process foo { ''' } -workflow align_combine_foo { +workflow { def input = Channel.of('alpha','beta','delta') align(input) @@ -75,18 +75,11 @@ workflow align_combine_foo { foo.out >> (params.save_foo ? 'data' : null) } -workflow { - align_combine_foo() - - publish: - 'more/data' >> 'more' -} - -publish { +output { directory 'results' + mode 'copy' 'data' { - mode 'link' index { path 'index.csv' mapper { val -> [filename: val] } @@ -94,8 +87,4 @@ publish { sep ',' } } - - 'more' { - mode 'copy' - } } From d504e45b5ff7e06b2651a079fe1724140c2f2966 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 12 May 2024 16:39:32 +0200 Subject: [PATCH 41/47] Minor change [ci skip] Signed-off-by: Paolo Di Tommaso --- modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy b/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy index ac7eeec866..c45d8cecfe 100644 --- a/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy @@ -58,7 +58,7 @@ class NextflowMeta { void setOutput(Boolean output) { if( output ) - log.warn "WORKFLOW OUTPUT DEFINITION IS A PREVIEW FEATURE - SYNTAX AND FUNCTIONALITY CAN CHANGE IN FUTURE RELEASES" + log.warn "WORKFLOW OUTPUT DSL IS A PREVIEW FEATURE - SYNTAX AND FUNCTIONALITY CAN CHANGE IN FUTURE RELEASES" this.output = output } From 6185d841830213754b116d4c4c5539c00378f0e5 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 13 May 2024 07:02:33 -0500 Subject: [PATCH 42/47] Apply suggestions from review Signed-off-by: Ben Sherman --- .../nextflow/extension/PublishIndexOp.groovy | 2 + .../nextflow/extension/PublishOp.groovy | 1 + .../nextflow/processor/PublishDir.groovy | 4 +- .../groovy/nextflow/script/BaseScript.groovy | 4 +- .../groovy/nextflow/script/OutputDef.groovy | 47 ++++++++++++++++++ .../{PublishDef.groovy => OutputDsl.groovy} | 28 +---------- .../nextflow/script/WorkflowDefTest.groovy | 48 ------------------- .../src/main/nextflow/util/HashBuilder.java | 12 +++++ tests/{publish-def.nf => output-dsl.nf} | 0 9 files changed, 67 insertions(+), 79 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy rename modules/nextflow/src/main/groovy/nextflow/script/{PublishDef.groovy => OutputDsl.groovy} (93%) rename tests/{publish-def.nf => output-dsl.nf} (100%) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy index 50b327c453..401423e99b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy @@ -25,6 +25,8 @@ import nextflow.Global import nextflow.Session import nextflow.util.CsvWriter /** + * Publish an index file describing all files from a source + * channel, including metdata. * * @author Ben Sherman */ diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index 0b330de2b3..45d1adc245 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -25,6 +25,7 @@ import nextflow.Global import nextflow.Session import nextflow.processor.PublishDir /** + * Publish files from a source channel. * * @author Paolo Di Tommaso */ diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy index 68b4173283..354e42ab9a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/PublishDir.groovy @@ -520,8 +520,8 @@ class PublishDir { return overwrite final hashMode = HashMode.of(overwrite) ?: HashMode.DEFAULT() - final sourceHash = new HashBuilder().withMode(hashMode).withBasePath(source.parent).with(source).build() - final targetHash = new HashBuilder().withMode(hashMode).withBasePath(target.parent).with(target).build() + final sourceHash = HashBuilder.hashPath(source, source.parent, hashMode) + final targetHash = HashBuilder.hashPath(target, target.parent, hashMode) log.trace "comparing source and target with mode=${overwrite}, source=${sourceHash}, target=${targetHash}, should overwrite=${sourceHash != targetHash}" return sourceHash != targetHash } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy index 4dd6577726..5ec823b5a8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/BaseScript.groovy @@ -42,7 +42,7 @@ abstract class BaseScript extends Script implements ExecutionContext { private WorkflowDef entryFlow - private PublishDef publisher + private OutputDef publisher @Lazy InputStream stdin = { System.in }() @@ -131,7 +131,7 @@ abstract class BaseScript extends Script implements ExecutionContext { if( ExecutionStack.withinWorkflow() ) throw new IllegalStateException("Workflow output definition is not allowed within a workflow") - publisher = new PublishDef(closure) + publisher = new OutputDef(closure) } protected IncludeDef include( IncludeDef include ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy new file mode 100644 index 0000000000..73de6e16ee --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDef.groovy @@ -0,0 +1,47 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.script + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import groovyx.gpars.dataflow.DataflowWriteChannel +/** + * Models the workflow output definition + * + * @author Ben Sherman + */ +@Slf4j +@CompileStatic +class OutputDef { + + private Closure closure + + OutputDef(Closure closure) { + this.closure = closure + } + + void run(Map targets) { + final dsl = new OutputDsl() + final cl = (Closure)closure.clone() + cl.setDelegate(dsl) + cl.setResolveStrategy(Closure.DELEGATE_FIRST) + cl.call() + + dsl.build(targets) + } + +} diff --git a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy similarity index 93% rename from modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy rename to modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy index b79a3ca2df..a111babfaa 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/PublishDef.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy @@ -27,32 +27,6 @@ import nextflow.extension.CH import nextflow.extension.MixOp import nextflow.extension.PublishOp import nextflow.extension.PublishIndexOp -/** - * Models the workflow publish definition - * - * @author Ben Sherman - */ -@Slf4j -@CompileStatic -class PublishDef { - - private Closure closure - - PublishDef(Closure closure) { - this.closure = closure - } - - void run(Map targets) { - final dsl = new PublishDsl() - final cl = (Closure)closure.clone() - cl.setDelegate(dsl) - cl.setResolveStrategy(Closure.DELEGATE_FIRST) - cl.call() - - dsl.build(targets) - } - -} /** * Implements the DSL for publishing workflow outputs @@ -61,7 +35,7 @@ class PublishDef { */ @Slf4j @CompileStatic -class PublishDsl { +class OutputDsl { private Map publishConfigs = [:] diff --git a/modules/nextflow/src/test/groovy/nextflow/script/WorkflowDefTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/WorkflowDefTest.groovy index 00a0f9dedf..4607a45619 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/WorkflowDefTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/WorkflowDefTest.groovy @@ -161,54 +161,6 @@ class WorkflowDefTest extends Dsl2Spec { } - - def 'should capture publish defs' () { - - given: - def config = new CompilerConfiguration() - config.setScriptBaseClass(TestScript.class.name) - config.addCompilationCustomizers( new ASTTransformationCustomizer(NextflowDSL)) - - def SCRIPT = ''' - - workflow { - publish: - foo - bar to: 'some/path' - baz.out to: 'other/path' - main: - x = 1 - } - ''' - - when: - def script = (TestScript)new GroovyShell(new ScriptBinding(), config).parse(SCRIPT).run() - then: - thrown(MultipleCompilationErrorsException) - } - - def 'should not allow publish is sub-workflow' () { - - given: - def config = new CompilerConfiguration() - config.setScriptBaseClass(TestScript.class.name) - config.addCompilationCustomizers( new ASTTransformationCustomizer(NextflowDSL)) - - def SCRIPT = ''' - - workflow alpha { - publish: foo - main: - x = 1 - } - ''' - - when: - new GroovyShell(config).parse(SCRIPT) - then: - thrown(MultipleCompilationErrorsException) - } - def 'should report malformed workflow block' () { given: diff --git a/modules/nf-commons/src/main/nextflow/util/HashBuilder.java b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java index 838aed0cdb..46c3fedf84 100644 --- a/modules/nf-commons/src/main/nextflow/util/HashBuilder.java +++ b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java @@ -213,6 +213,18 @@ public static Hasher hasher( Hasher hasher, Object value, HashMode mode ) { .getHasher(); } + /** + * Hash a file using only the relative file name instead of + * the absolute file path. + * + * @param path + * @param basePath + * @param mode + */ + public static HashCode hashPath(Path path, Path basePath, HashMode mode) { + return new HashBuilder().withMode(mode).withBasePath(basePath).with(path).build(); + } + /** * Hashes the specified file * diff --git a/tests/publish-def.nf b/tests/output-dsl.nf similarity index 100% rename from tests/publish-def.nf rename to tests/output-dsl.nf From 79cc68c6dae83370f3e5535fc4fb52fba0dbb97e Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 15 May 2024 12:22:52 +0200 Subject: [PATCH 43/47] Fix typo [ci skip] Signed-off-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/extension/PublishIndexOp.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy index 401423e99b..686a9d07d8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy @@ -26,7 +26,7 @@ import nextflow.Session import nextflow.util.CsvWriter /** * Publish an index file describing all files from a source - * channel, including metdata. + * channel, including metadata. * * @author Ben Sherman */ From ef4305d2b6244977b883758ec9267e72a1b2479e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 15 May 2024 11:27:24 -0500 Subject: [PATCH 44/47] Add shorthand for publishing single file to index Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/extension/PublishIndexOp.groovy | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy index 686a9d07d8..acecd9e9b4 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy @@ -84,6 +84,9 @@ class PublishIndexOp { } protected Object normalizePaths(value) { + if( value instanceof Path ) + return List.of(normalizePath(value)) + if( value instanceof Collection ) { return value.collect { el -> if( el instanceof Path ) @@ -104,7 +107,7 @@ class PublishIndexOp { } } - throw new IllegalArgumentException("Index file record must be a list or map: ${value} [${value.class.simpleName}]") + throw new IllegalArgumentException("Index file record must be a list, map, or file: ${value} [${value.class.simpleName}]") } private Path normalizePath(Path path) { From b8cf823291acf6dc0dda37cb6cd05729cc4d9879 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 16 May 2024 00:16:21 -0500 Subject: [PATCH 45/47] Fold PublishIndexOp into PublishOp, add test for OutputDsl, Signed-off-by: Ben Sherman --- .../nextflow/extension/PublishIndexOp.groovy | 144 ------------------ .../nextflow/extension/PublishOp.groovy | 91 ++++++++++- .../groovy/nextflow/script/OutputDsl.groovy | 30 ++-- .../nextflow/extension/PublishOpTest.groovy | 71 --------- .../nextflow/script/OutputDslTest.groovy | 85 +++++++++++ 5 files changed, 190 insertions(+), 231 deletions(-) delete mode 100644 modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy delete mode 100644 modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy create mode 100644 modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy deleted file mode 100644 index acecd9e9b4..0000000000 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishIndexOp.groovy +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.extension - -import java.nio.file.Path - -import groovy.transform.CompileStatic -import groovy.util.logging.Slf4j -import groovyx.gpars.dataflow.DataflowReadChannel -import nextflow.Global -import nextflow.Session -import nextflow.util.CsvWriter -/** - * Publish an index file describing all files from a source - * channel, including metadata. - * - * @author Ben Sherman - */ -@Slf4j -@CompileStatic -class PublishIndexOp { - - private DataflowReadChannel source - - private Path basePath - - private Path path - - private Closure mapper - - private /* boolean | List */ header = false - - private String sep = ',' - - private List records = [] - - private Session getSession() { Global.session as Session } - - PublishIndexOp(DataflowReadChannel source, Path basePath, String indexPath, Map opts) { - this.source = source - this.basePath = basePath - this.path = basePath.resolve(indexPath) - if( opts.mapper ) - this.mapper = opts.mapper as Closure - if( opts.header != null ) - this.header = opts.header - if( opts.sep ) - this.sep = opts.sep as String - } - - void apply() { - final events = new HashMap(2) - events.onNext = this.&onNext - events.onComplete = this.&onComplete - DataflowHelper.subscribeImpl(source, events) - } - - protected void onNext(value) { - final record = mapper != null ? mapper.call(value) : value - final normalized = normalizePaths(record) - log.trace "Normalized record for index file: ${normalized}" - records << normalized - } - - protected void onComplete(nope) { - if( records.size() == 0 ) - return - log.trace "Saving records to index file: ${records}" - new CsvWriter(header: header, sep: sep).apply(records, path) - } - - protected Object normalizePaths(value) { - if( value instanceof Path ) - return List.of(normalizePath(value)) - - if( value instanceof Collection ) { - return value.collect { el -> - if( el instanceof Path ) - return normalizePath(el) - if( el instanceof Collection ) - return normalizePaths(el) - return el - } - } - - if( value instanceof Map ) { - return value.collectEntries { k, v -> - if( v instanceof Path ) - return List.of(k, normalizePath(v)) - if( v instanceof Collection ) - return List.of(k, normalizePaths(v)) - return List.of(k, v) - } - } - - throw new IllegalArgumentException("Index file record must be a list, map, or file: ${value} [${value.class.simpleName}]") - } - - private Path normalizePath(Path path) { - final sourceDir = getTaskDir(path) - return basePath.resolve(sourceDir.relativize(path)) - } - - /** - * Given a path try to infer the task directory to which the path below - * ie. the directory starting with a workflow work dir and having at lest - * two sub-directories eg work-dir/xx/yyyyyy/etc - * - * @param path - */ - protected Path getTaskDir(Path path) { - if( path == null ) - return null - return getTaskDir0(path, session.workDir.resolve('tmp')) - ?: getTaskDir0(path, session.workDir) - ?: getTaskDir0(path, session.bucketDir) - } - - private Path getTaskDir0(Path file, Path base) { - if( base == null ) - return null - if( base.fileSystem != file.fileSystem ) - return null - final len = base.nameCount - if( file.startsWith(base) && file.getNameCount() > len+2 ) - return base.resolve(file.subpath(len,len+2)) - return null - } - -} diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index 45d1adc245..68f4c8dc63 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -24,6 +24,7 @@ import groovyx.gpars.dataflow.DataflowReadChannel import nextflow.Global import nextflow.Session import nextflow.processor.PublishDir +import nextflow.util.CsvWriter /** * Publish files from a source channel. * @@ -37,6 +38,12 @@ class PublishOp { private PublishDir publisher + private Path targetDir + + private IndexOpts indexOpts + + private List indexRecords = [] + private volatile boolean complete private Session getSession() { Global.session as Session } @@ -44,6 +51,9 @@ class PublishOp { PublishOp(DataflowReadChannel source, Map opts) { this.source = source this.publisher = PublishDir.create(opts) + this.targetDir = opts.path as Path + if( opts.index ) + this.indexOpts = new IndexOpts(targetDir, opts.index as Map) } boolean getComplete() { complete } @@ -64,13 +74,32 @@ class PublishOp { final files = entry.value publisher.apply(files, sourceDir) } + + if( indexOpts ) { + final record = indexOpts.mapper != null ? indexOpts.mapper.call(value) : value + final normalized = normalizePaths(record) + log.trace "Normalized record for index file: ${normalized}" + indexRecords << normalized + } } protected void onComplete(nope) { + if( indexOpts && indexRecords.size() > 0 ) { + log.trace "Saving records to index file: ${indexRecords}" + new CsvWriter(header: indexOpts.header, sep: indexOpts.sep).apply(indexRecords, indexOpts.path) + session.notifyFilePublish(indexOpts.path) + } + log.trace "Publish operator complete" this.complete = true } + /** + * Extract files from a received value for publishing. + * + * @param result + * @param value + */ protected Map> collectFiles(Map> result, value) { if( value instanceof Path ) { final sourceDir = getTaskDir(value) @@ -86,9 +115,47 @@ class PublishOp { } /** - * Given a path try to infer the task directory to which the path below - * ie. the directory starting with a workflow work dir and having at lest - * two sub-directories eg work-dir/xx/yyyyyy/etc + * Normalize the paths in a record by converting + * work directory paths to publish paths. + * + * @param value + */ + protected Object normalizePaths(value) { + if( value instanceof Path ) + return List.of(normalizePath(value)) + + if( value instanceof Collection ) { + return value.collect { el -> + if( el instanceof Path ) + return normalizePath(el) + if( el instanceof Collection ) + return normalizePaths(el) + return el + } + } + + if( value instanceof Map ) { + return value.collectEntries { k, v -> + if( v instanceof Path ) + return List.of(k, normalizePath(v)) + if( v instanceof Collection ) + return List.of(k, normalizePaths(v)) + return List.of(k, v) + } + } + + throw new IllegalArgumentException("Index file record must be a list, map, or file: ${value} [${value.class.simpleName}]") + } + + private Path normalizePath(Path path) { + final sourceDir = getTaskDir(path) + return targetDir.resolve(sourceDir.relativize(path)) + } + + /** + * Try to infer the parent task directory to which a path belongs. It + * should be a directory starting with a session work dir and having + * at lest two sub-directories, e.g. work/ab/cdef/etc * * @param path */ @@ -111,4 +178,22 @@ class PublishOp { return null } + static class IndexOpts { + Path path + Closure mapper + def /* boolean | List */ header = false + String sep = ',' + + IndexOpts(Path targetDir, Map opts) { + this.path = targetDir.resolve(opts.path as String) + + if( opts.mapper ) + this.mapper = opts.mapper as Closure + if( opts.header != null ) + this.header = opts.header + if( opts.sep ) + this.sep = opts.sep as String + } + } + } diff --git a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy index a111babfaa..5a13db1539 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/OutputDsl.groovy @@ -26,7 +26,7 @@ import nextflow.exception.ScriptRuntimeException import nextflow.extension.CH import nextflow.extension.MixOp import nextflow.extension.PublishOp -import nextflow.extension.PublishIndexOp +import nextflow.file.FileHelper /** * Implements the DSL for publishing workflow outputs @@ -43,10 +43,12 @@ class OutputDsl { private Map defaults = [:] + private volatile List ops = [] + void directory(String directory) { if( this.directory ) throw new ScriptRuntimeException("Publish directory cannot be defined more than once in the workflow publish definition") - this.directory = (directory as Path).complete() + this.directory = FileHelper.toCanonicalPath(directory) } void contentType(String value) { @@ -120,22 +122,13 @@ class OutputDsl { : sources.first() final opts = publishOptions(name, publishConfigs[name] ?: [:]) - new PublishOp(CH.getReadChannel(mixed), opts).apply() - - if( opts.index ) { - final basePath = opts.path as Path - final indexOpts = opts.index as Map - final indexPath = indexOpts.path as String - if( !indexPath ) - throw new ScriptRuntimeException("Index file definition for publish target '${name}' is missing `path` option") - new PublishIndexOp(CH.getReadChannel(mixed), basePath, indexPath, indexOpts).apply() - } + ops << new PublishOp(CH.getReadChannel(mixed), opts).apply() } } private Map publishOptions(String name, Map overrides) { if( !directory ) - directory = Paths.get('.').complete() + directory = FileHelper.toCanonicalPath('.') final opts = defaults + overrides if( opts.containsKey('ignoreErrors') ) @@ -147,9 +140,20 @@ class OutputDsl { if( path.startsWith('/') ) throw new ScriptRuntimeException("Invalid publish target path '${path}' -- it should be a relative path") opts.path = directory.resolve(path) + + if( opts.index && !(opts.index as Map).path ) + throw new ScriptRuntimeException("Index file definition for publish target '${name}' is missing `path` option") + return opts } + boolean getComplete() { + for( final op : ops ) + if( !op.complete ) + return false + return true + } + static class TargetDsl { private Map opts = [:] diff --git a/modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy b/modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy deleted file mode 100644 index e8530c9e3b..0000000000 --- a/modules/nextflow/src/test/groovy/nextflow/extension/PublishOpTest.groovy +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2013-2024, Seqera Labs - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package nextflow.extension - -import java.nio.file.Files -import java.util.concurrent.TimeoutException - -import groovyx.gpars.dataflow.DataflowQueue -import nextflow.Channel -import nextflow.Global -import nextflow.Session -import test.BaseSpec -/** - * - * @author Paolo Di Tommaso - */ -class PublishOpTest extends BaseSpec { - - - def 'should publish files' () { - given: - def folder = Files.createTempDirectory('test') - def file1 = folder.resolve('file1.txt'); file1.text = 'Hello' - def file2 = folder.resolve('file2.txt'); file2.text = 'world' - def target = folder.resolve('target/dir') - - - def BASE = folder - def sess = Mock(Session) { - getWorkDir() >> BASE - getConfig() >> [:] - } - Global.session = sess - - and: - def ch = new DataflowQueue() - ch.bind(file1) - ch.bind(file2) - ch.bind(Channel.STOP) - - when: - def now = System.currentTimeMillis() - def op = new PublishOp(ch, [path:target, mode:'symlink']).apply() - while( !op.complete ) { - sleep 100 - if( System.currentTimeMillis() - now > 5_000 ) - throw new TimeoutException() - } - then: - target.resolve('file1.txt').text == 'Hello' - target.resolve('file2.txt').text == 'world' - - cleanup: - folder?.deleteDir() - } - -} diff --git a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy new file mode 100644 index 0000000000..125e344af0 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy @@ -0,0 +1,85 @@ +package nextflow.script + +import java.nio.file.Files +import java.util.concurrent.TimeoutException + +import groovyx.gpars.dataflow.DataflowQueue +import nextflow.Channel +import nextflow.Global +import nextflow.Session +import nextflow.SysEnv +import spock.lang.Specification +/** + * + * @author Ben Sherman + */ +class OutputDslTest extends Specification { + + def 'should publish workflow outputs'() { + given: + def root = Files.createTempDirectory('test') + def workDir = root.resolve('work') + def work1 = workDir.resolve('ab/1234'); Files.createDirectories(work1) + def work2 = workDir.resolve('cd/5678'); Files.createDirectories(work2) + def file1 = work1.resolve('file1.txt'); file1.text = 'Hello' + def file2 = work2.resolve('file2.txt'); file2.text = 'world' + def target = root.resolve('results') + and: + def session = Mock(Session) { + getConfig() >> [:] + getWorkDir() >> workDir + } + Global.session = session + and: + def ch1 = new DataflowQueue() + ch1.bind(file1) + ch1.bind(Channel.STOP) + and: + def ch2 = new DataflowQueue() + ch2.bind(file2) + ch2.bind(Channel.STOP) + and: + def targets = [ + (ch1): 'foo', + (ch2): 'bar' + ] + def dsl = new OutputDsl() + and: + SysEnv.push(NXF_FILE_ROOT: root.toString()) + + when: + dsl.directory('results') + dsl.mode('symlink') + dsl.overwrite(true) + dsl.target('bar') { + path('barbar') + index { + path 'index.csv' + } + } + dsl.build(targets) + + def now = System.currentTimeMillis() + while( !dsl.complete ) { + sleep 100 + if( System.currentTimeMillis() - now > 5_000 ) + throw new TimeoutException() + } + + then: + target.resolve('foo/file1.txt').text == 'Hello' + target.resolve('barbar/file2.txt').text == 'world' + target.resolve('barbar/index.csv').text == """\ + "${target}/barbar/file2.txt" + """.stripIndent() + and: + 1 * session.notifyFilePublish(target.resolve('foo/file1.txt'), file1) + 1 * session.notifyFilePublish(target.resolve('barbar/file2.txt'), file2) + 1 * session.notifyFilePublish(target.resolve('barbar/index.csv')) + + cleanup: + SysEnv.pop() + root?.deleteDir() + } + +} From 02ba0c7e3e1020ca097c2cd3053b058a16c25436 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 16 May 2024 01:38:20 -0500 Subject: [PATCH 46/47] Update index file default for single file Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/extension/PublishOp.groovy | 6 ++++-- .../src/test/groovy/nextflow/script/OutputDslTest.groovy | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index 68f4c8dc63..96779801dc 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -121,8 +121,10 @@ class PublishOp { * @param value */ protected Object normalizePaths(value) { - if( value instanceof Path ) - return List.of(normalizePath(value)) + if( value instanceof Path ) { + final target = normalizePath(value) + return List.of(targetDir.relativize(target), target) + } if( value instanceof Collection ) { return value.collect { el -> diff --git a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy index 125e344af0..ce3705c44a 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy @@ -70,7 +70,7 @@ class OutputDslTest extends Specification { target.resolve('foo/file1.txt').text == 'Hello' target.resolve('barbar/file2.txt').text == 'world' target.resolve('barbar/index.csv').text == """\ - "${target}/barbar/file2.txt" + "file2.txt","${target}/barbar/file2.txt" """.stripIndent() and: 1 * session.notifyFilePublish(target.resolve('foo/file1.txt'), file1) From 0db73c87de7a0f6224bad6fadcd26057aae56754 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 16 May 2024 14:59:34 -0500 Subject: [PATCH 47/47] Use file base name for default index Signed-off-by: Ben Sherman --- .../src/main/groovy/nextflow/extension/PublishOp.groovy | 3 +-- .../src/test/groovy/nextflow/script/OutputDslTest.groovy | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy index 96779801dc..5e1e7ae181 100644 --- a/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/extension/PublishOp.groovy @@ -122,8 +122,7 @@ class PublishOp { */ protected Object normalizePaths(value) { if( value instanceof Path ) { - final target = normalizePath(value) - return List.of(targetDir.relativize(target), target) + return List.of(value.getBaseName(), normalizePath(value)) } if( value instanceof Collection ) { diff --git a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy index ce3705c44a..dabf4c6212 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/OutputDslTest.groovy @@ -70,7 +70,7 @@ class OutputDslTest extends Specification { target.resolve('foo/file1.txt').text == 'Hello' target.resolve('barbar/file2.txt').text == 'world' target.resolve('barbar/index.csv').text == """\ - "file2.txt","${target}/barbar/file2.txt" + "file2","${target}/barbar/file2.txt" """.stripIndent() and: 1 * session.notifyFilePublish(target.resolve('foo/file1.txt'), file1)