From 4970b54675b85591cc3da778d136bfb915bd9e65 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 20 Mar 2024 14:03:40 +0100 Subject: [PATCH 01/18] simple steps for the transition to an operator --- .../main/nextflow/validation/SchemaValidator.groovy | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index 6d755574..5ab0573f 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -6,6 +6,7 @@ import groovy.json.JsonGenerator import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import groovyx.gpars.dataflow.DataflowWriteChannel +import groovyx.gpars.dataflow.DataflowReadChannel import java.nio.file.Files import java.nio.file.Path import java.util.regex.Matcher @@ -14,7 +15,7 @@ import nextflow.extension.CH import nextflow.Channel import nextflow.Global import nextflow.Nextflow -import nextflow.plugin.extension.Factory +import nextflow.plugin.extension.Operator import nextflow.plugin.extension.Function import nextflow.plugin.extension.PluginExtensionPoint import nextflow.script.WorkflowMetadata @@ -133,10 +134,11 @@ class SchemaValidator extends PluginExtensionPoint { m.findResult { k, v -> v instanceof Map ? findDeep(v, key) : null } } - @Factory + @Operator public DataflowWriteChannel fromSamplesheet( - Map options = null, - String samplesheetParam + final DataflowReadChannel source, + final Path schema, + final Map options = null, ) { def Map params = session.params From 4977ec097afac7fdc047c87fd4358f3858fa2320 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 20 Mar 2024 16:01:36 +0100 Subject: [PATCH 02/18] convert fromSamplesheet to an operator --- .../validation/SchemaValidator.groovy | 90 ++++++------------- 1 file changed, 27 insertions(+), 63 deletions(-) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index 5ab0573f..646ea620 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -12,6 +12,7 @@ import java.nio.file.Path import java.util.regex.Matcher import java.util.regex.Pattern import nextflow.extension.CH +import nextflow.extension.DataflowHelper import nextflow.Channel import nextflow.Global import nextflow.Nextflow @@ -138,73 +139,36 @@ class SchemaValidator extends PluginExtensionPoint { public DataflowWriteChannel fromSamplesheet( final DataflowReadChannel source, final Path schema, - final Map options = null, + final Map options = null ) { - def Map params = session.params - - // Set defaults for optional inputs - def String schemaFilename = options?.containsKey('parameters_schema') ? options.parameters_schema as String : 'nextflow_schema.json' - def String baseDir = session.baseDir.toString() - - // Get the samplesheet schema from the parameters schema - def slurper = new JsonSlurper() - def Map parsed = (Map) slurper.parse( Path.of(Utils.getSchemaPath(baseDir, schemaFilename)) ) - def Map samplesheetValue = (Map) findDeep(parsed, samplesheetParam) - def Path samplesheetFile = params[samplesheetParam] as Path - - // Some safeguard to make sure the channel factory runs correctly - if (samplesheetValue == null) { - log.error """ -Parameter '--$samplesheetParam' was not found in the schema ($schemaFilename). -Unable to create a channel from it. - -Please make sure you correctly specified the inputs to `.fromSamplesheet`: - --------------------------------------------------------------------------------------- -Channel.fromSamplesheet("input") --------------------------------------------------------------------------------------- - -This would create a channel from params.input using the schema specified in the parameters JSON schema for this parameter. -""" - throw new SchemaValidationException("", []) - } - else if (samplesheetFile == null) { - log.error "Parameter '--$samplesheetParam' was not provided. Unable to create a channel from it." - throw new SchemaValidationException("", []) - } - else if (!samplesheetValue.containsKey('schema')) { - log.error "Parameter '--$samplesheetParam' does not contain a schema in the parameter schema ($schemaFilename). Unable to create a channel from it." - throw new SchemaValidationException("", []) - } - - // Convert to channel - final channel = CH.create() - def List arrayChannel = [] - try { - def Path schemaFile = Path.of(Utils.getSchemaPath(baseDir, samplesheetValue['schema'].toString())) - def SamplesheetConverter converter = new SamplesheetConverter(samplesheetFile, schemaFile) - arrayChannel = converter.convertToList() - } catch (Exception e) { - log.error( - """ Following error has been found during samplesheet conversion: - ${e} - ${e.getStackTrace().join("\n\t")} - -Please run validateParameters() first before trying to convert a samplesheet to a channel. -Reference: https://nextflow-io.github.io/nf-schema/parameters/validation/ - -Also make sure that the same schema is used for validation and conversion of the samplesheet -""" as String - ) - } - - session.addIgniter { + def params = session.params + final target = CH.createBy(source) + final validator = new JsonSchemaValidator() + final next = { + def JSONArray samplesheet = Utils.fileToJsonArray(it as Path, schema) + def List validationErrors = validator.validate(samplesheet, schema.text) + this.errors.addAll(validationErrors) + if (validationErrors) { + def Boolean useMonochromeLogs = options?.containsKey('monochrome_logs') ? options.monochrome_logs as Boolean : + params.monochrome_logs ? params.monochrome_logs as Boolean : + params.monochromeLogs ? params.monochromeLogs as Boolean : + false + def colors = logColours(useMonochromeLogs) + def msg = "${colors.red}The following errors have been detected in ${it.toString()}:\n\n" + validationErrors.join('\n').trim() + "\n${colors.reset}\n" + log.error("Validation of samplesheet failed!") + throw new SchemaValidationException(msg, this.getErrors()) + } + def SamplesheetConverter converter = new SamplesheetConverter(it as Path, schema) + def List arrayChannel = converter.convertToList() arrayChannel.each { - channel.bind(it) + target.bind(it) } - channel.bind(Channel.STOP) } - return channel + final done = { + target.bind(Channel.STOP) + } + DataflowHelper.subscribeImpl(source, [onNext: next, onComplete: done]) + return target } From ba91212e272ee31c9332955426a5736fa4771f71 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 21 Mar 2024 09:33:30 +0100 Subject: [PATCH 03/18] move as much logic as possible to the SamplesheetConverter --- .../validation/SamplesheetConverter.groovy | 27 +++++- .../validation/SchemaValidator.groovy | 92 +------------------ .../src/main/nextflow/validation/Utils.groovy | 68 ++++++++++++++ 3 files changed, 97 insertions(+), 90 deletions(-) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy b/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy index 5fe8b21b..0ea25d31 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy @@ -3,9 +3,10 @@ package nextflow.validation import groovy.json.JsonSlurper import groovy.transform.CompileStatic import groovy.util.logging.Slf4j - import java.nio.file.Path +import org.json.JSONArray + import nextflow.Nextflow /** @@ -20,10 +21,14 @@ class SamplesheetConverter { private static Path samplesheetFile private static Path schemaFile + private static nextflow.script.ScriptBinding$ParamsMap params + private static Map options - SamplesheetConverter(Path samplesheetFile, Path schemaFile) { + SamplesheetConverter(Path samplesheetFile, Path schemaFile, nextflow.script.ScriptBinding$ParamsMap params, Map options) { this.samplesheetFile = samplesheetFile this.schemaFile = schemaFile + this.params = params + this.options = options } private static List rows = [] @@ -62,8 +67,24 @@ class SamplesheetConverter { /* Convert the samplesheet to a list of entries based on a schema */ - public static List convertToList() { + public static List validateAndConvertToList() { + + // Validate + final validator = new JsonSchemaValidator() + def JSONArray samplesheet = Utils.fileToJsonArray(this.samplesheetFile, this.schemaFile) + def List validationErrors = validator.validate(samplesheet, this.schemaFile.text) + if (validationErrors) { + def Boolean useMonochromeLogs = this.options?.containsKey('monochrome_logs') ? this.options.monochrome_logs as Boolean : + this.params.monochrome_logs ? this.params.monochrome_logs as Boolean : + this.params.monochromeLogs ? this.params.monochromeLogs as Boolean : + false + def colors = Utils.logColours(useMonochromeLogs) + def msg = "${colors.red}The following errors have been detected in ${this.samplesheetFile.toString()}:\n\n" + validationErrors.join('\n').trim() + "\n${colors.reset}\n" + log.error("Validation of samplesheet failed!") + throw new SchemaValidationException(msg, validationErrors) + } + // Convert def LinkedHashMap schemaMap = new JsonSlurper().parseText(this.schemaFile.text) as LinkedHashMap def List samplesheetList = Utils.fileToList(this.samplesheetFile, this.schemaFile) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index 646ea620..863075fb 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -143,23 +143,9 @@ class SchemaValidator extends PluginExtensionPoint { ) { def params = session.params final target = CH.createBy(source) - final validator = new JsonSchemaValidator() final next = { - def JSONArray samplesheet = Utils.fileToJsonArray(it as Path, schema) - def List validationErrors = validator.validate(samplesheet, schema.text) - this.errors.addAll(validationErrors) - if (validationErrors) { - def Boolean useMonochromeLogs = options?.containsKey('monochrome_logs') ? options.monochrome_logs as Boolean : - params.monochrome_logs ? params.monochrome_logs as Boolean : - params.monochromeLogs ? params.monochromeLogs as Boolean : - false - def colors = logColours(useMonochromeLogs) - def msg = "${colors.red}The following errors have been detected in ${it.toString()}:\n\n" + validationErrors.join('\n').trim() + "\n${colors.reset}\n" - log.error("Validation of samplesheet failed!") - throw new SchemaValidationException(msg, this.getErrors()) - } - def SamplesheetConverter converter = new SamplesheetConverter(it as Path, schema) - def List arrayChannel = converter.convertToList() + def SamplesheetConverter converter = new SamplesheetConverter(it as Path, schema, params, options) + def List arrayChannel = converter.validateAndConvertToList() arrayChannel.each { target.bind(it) } @@ -304,7 +290,7 @@ class SchemaValidator extends PluginExtensionPoint { } // Colors - def colors = logColours(useMonochromeLogs) + def colors = Utils.logColours(useMonochromeLogs) // Validate List validationErrors = validator.validate(paramsJSON, schema_string) @@ -373,7 +359,7 @@ class SchemaValidator extends PluginExtensionPoint { params.monochromeLogs ? params.monochromeLogs as Boolean : false - def colors = logColours(useMonochromeLogs) + def colors = Utils.logColours(useMonochromeLogs) Integer num_hidden = 0 String output = '' output += 'Typical pipeline command:\n\n' @@ -553,7 +539,7 @@ class SchemaValidator extends PluginExtensionPoint { params.monochromeLogs ? params.monochromeLogs as Boolean : false - def colors = logColours(useMonochromeLogs) + def colors = Utils.logColours(useMonochromeLogs) String output = '' def LinkedHashMap params_map = paramsSummaryMap(workflow, parameters_schema: schemaFilename) def max_chars = paramsMaxChars(params_map) @@ -691,72 +677,4 @@ class SchemaValidator extends PluginExtensionPoint { } return max_chars } - - // - // ANSII Colours used for terminal logging - // - private static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } } diff --git a/plugins/nf-schema/src/main/nextflow/validation/Utils.groovy b/plugins/nf-schema/src/main/nextflow/validation/Utils.groovy index eea33003..79f2c4e6 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/Utils.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/Utils.groovy @@ -201,4 +201,72 @@ public class Utils { return "" } } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } } \ No newline at end of file From d28b63cb1e0e46335f5ad819598e7f51caf96548 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 21 Mar 2024 10:21:36 +0100 Subject: [PATCH 04/18] Make the operator more stable --- .../validation/SamplesheetConverter.groovy | 18 +++++++++++---- .../SchemaValidationException.groovy | 2 +- .../validation/SchemaValidator.groovy | 23 +++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy b/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy index 0ea25d31..b0767f50 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy @@ -69,16 +69,24 @@ class SamplesheetConverter { */ public static List validateAndConvertToList() { + // Logging + def Boolean useMonochromeLogs = this.options?.containsKey('monochrome_logs') ? this.options.monochrome_logs as Boolean : + this.params.monochrome_logs ? this.params.monochrome_logs as Boolean : + this.params.monochromeLogs ? this.params.monochromeLogs as Boolean : + false + def colors = Utils.logColours(useMonochromeLogs) + + // Some checks before validating + if(!this.schemaFile.exists()) { + def msg = "${colors.red}JSON schema file ${this.schemaFile.toString()} does not exist\n${colors.reset}\n" + throw new SchemaValidationException(msg) + } + // Validate final validator = new JsonSchemaValidator() def JSONArray samplesheet = Utils.fileToJsonArray(this.samplesheetFile, this.schemaFile) def List validationErrors = validator.validate(samplesheet, this.schemaFile.text) if (validationErrors) { - def Boolean useMonochromeLogs = this.options?.containsKey('monochrome_logs') ? this.options.monochrome_logs as Boolean : - this.params.monochrome_logs ? this.params.monochrome_logs as Boolean : - this.params.monochromeLogs ? this.params.monochromeLogs as Boolean : - false - def colors = Utils.logColours(useMonochromeLogs) def msg = "${colors.red}The following errors have been detected in ${this.samplesheetFile.toString()}:\n\n" + validationErrors.join('\n').trim() + "\n${colors.reset}\n" log.error("Validation of samplesheet failed!") throw new SchemaValidationException(msg, validationErrors) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidationException.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidationException.groovy index f772ec68..987c2594 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidationException.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidationException.groovy @@ -14,7 +14,7 @@ class SchemaValidationException extends AbortOperationException { List getErrors() { errors } - SchemaValidationException(String message, List errors) { + SchemaValidationException(String message, List errors=[]) { super(message) this.errors = new ArrayList<>(errors) } diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index 863075fb..eb5ee89b 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -135,15 +135,38 @@ class SchemaValidator extends PluginExtensionPoint { m.findResult { k, v -> v instanceof Map ? findDeep(v, key) : null } } + @Operator + public DataflowWriteChannel fromSamplesheet( + final DataflowReadChannel source, + final String schema, + final Map options = null + ) { + def String fullPathSchema = Utils.getSchemaPath(session.baseDir.toString(), schema) + def Path schemaFile = Nextflow.file(fullPathSchema) as Path + return fromSamplesheet(source, schemaFile, options) + } + @Operator public DataflowWriteChannel fromSamplesheet( final DataflowReadChannel source, final Path schema, final Map options = null ) { + // Logging def params = session.params + def Boolean useMonochromeLogs = options?.containsKey('monochrome_logs') ? options.monochrome_logs as Boolean : + params.monochrome_logs ? params.monochrome_logs as Boolean : + params.monochromeLogs ? params.monochromeLogs as Boolean : + false + + def colors = Utils.logColours(useMonochromeLogs) + final target = CH.createBy(source) final next = { + if(!(it instanceof String || it instanceof Path)) { + def msg = "${colors.red}The .fromSamplesheet operator only takes a channel with one value per entry (either a String or Path type)\n${colors.reset}\n" + throw new SchemaValidationException(msg) + } def SamplesheetConverter converter = new SamplesheetConverter(it as Path, schema, params, options) def List arrayChannel = converter.validateAndConvertToList() arrayChannel.each { From 7da6e0615254c8d2b8638ae71289200e9cf2ea24 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 21 Mar 2024 10:46:10 +0100 Subject: [PATCH 05/18] add function equivalent to fromSamplesheet --- .../validation/SamplesheetConverter.groovy | 5 +++ .../validation/SchemaValidator.groovy | 39 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy b/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy index b0767f50..3fbe3c29 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SamplesheetConverter.groovy @@ -82,6 +82,11 @@ class SamplesheetConverter { throw new SchemaValidationException(msg) } + if(!this.samplesheetFile.exists()) { + def msg = "${colors.red}Samplesheet file ${this.samplesheetFile.toString()} does not exist\n${colors.reset}\n" + throw new SchemaValidationException(msg) + } + // Validate final validator = new JsonSchemaValidator() def JSONArray samplesheet = Utils.fileToJsonArray(this.samplesheetFile, this.schemaFile) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index eb5ee89b..6ca776e0 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -135,6 +135,45 @@ class SchemaValidator extends PluginExtensionPoint { m.findResult { k, v -> v instanceof Map ? findDeep(v, key) : null } } + @Function + public List samplesheetToList( + final String samplesheet, + final String schema, + final Map options = null + ) { + return samplesheetToList(samplesheet as Path, schema, options) + } + + @Function + public List samplesheetToList( + final Path samplesheet, + final String schema, + final Map options = null + ) { + def String fullPathSchema = Utils.getSchemaPath(session.baseDir.toString(), schema) + def Path schemaFile = Nextflow.file(fullPathSchema) as Path + return samplesheetToList(samplesheet, schemaFile, options) + } + + @Function + public List samplesheetToList( + final String samplesheet, + final Path schema, + final Map options = null + ) { + return samplesheetToList(samplesheet as Path, schema, options) + } + + @Function + public List samplesheetToList( + final Path samplesheet, + final Path schema, + final Map options = null + ) { + def SamplesheetConverter converter = new SamplesheetConverter(samplesheet, schema, session.params, options) + return converter.validateAndConvertToList() + } + @Operator public DataflowWriteChannel fromSamplesheet( final DataflowReadChannel source, From 3772bea6d1f94f294317a85a27f784b93127def0 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 9 Apr 2024 11:33:32 +0200 Subject: [PATCH 06/18] update tests --- .../SamplesheetConverterTest.groovy | 153 +++++++++++++----- 1 file changed, 111 insertions(+), 42 deletions(-) diff --git a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy index 1c7fd250..bed7b5d8 100644 --- a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy +++ b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy @@ -61,10 +61,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/correct.csv' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view().first().map {println(it[0].getClass())} + Channel.of("src/testResources/correct.csv") + .fromSamplesheet("src/testResources/schema_input.json") + .view() } ''' @@ -88,10 +88,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/correct_quoted.csv' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view().first().map {println(it[0].getClass())} + Channel.fromPath('src/testResources/correct_quoted.csv') + .fromSamplesheet("src/testResources/schema_input.json") + .view() } ''' @@ -115,10 +115,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/correct.tsv' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view() + Channel.of('src/testResources/correct.tsv') + .fromSamplesheet(file("src/testResources/schema_input.json", checkIfExists:true)) + .view() } ''' @@ -142,10 +142,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/correct.yaml' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view() + Channel.of('src/testResources/correct.yaml') + .fromSamplesheet("src/testResources/schema_input.json") + .view() } ''' @@ -169,10 +169,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/correct.json' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view() + Channel.of('src/testResources/correct.json') + .fromSamplesheet("src/testResources/schema_input.json") + .view() } ''' @@ -196,10 +196,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/correct_arrays.yaml' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json").view() + Channel.of('src/testResources/correct_arrays.yaml') + .fromSamplesheet("src/testResources/schema_input_with_arrays.json") + .view() } ''' @@ -222,10 +222,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/correct_arrays.json' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json").view() + Channel.of('src/testResources/correct_arrays.json') + .fromSamplesheet("src/testResources/schema_input_with_arrays.json") + .view() } ''' @@ -248,10 +248,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/no_header.csv' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_header.json").view() + Channel.of('src/testResources/no_header.csv') + .fromSamplesheet("src/testResources/no_header_schema.json") + .view() } ''' @@ -272,10 +272,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/no_header.yaml' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_header.json").view() + Channel.of('src/testResources/no_header.yaml') + .fromSamplesheet("src/testResources/no_header_schema.json") + .view() } ''' @@ -296,10 +296,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/no_header.json' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_header.json").view() + Channel.of('src/testResources/no_header.json') + .fromSamplesheet("src/testResources/no_header_schema.json") + .view() } ''' @@ -320,10 +320,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/extraFields.csv' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view() + Channel.of('src/testResources/extraFields.csv') + .fromSamplesheet("src/testResources/schema_input.json") + .view() } ''' @@ -351,10 +351,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/no_meta.csv' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_meta.json").view() + Channel.of('src/testResources/no_meta.csv') + .fromSamplesheet("src/testResources/no_meta_schema.json") + .view() } ''' @@ -375,10 +375,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/deeply_nested.yaml' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_deeply_nested_samplesheet.json").view() + Channel.of('src/testResources/deeply_nested.yaml') + .fromSamplesheet("src/testResources/samplesheet_schema_deeply_nested.json") + .view() } ''' @@ -399,10 +399,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-schema' - params.input = 'src/testResources/deeply_nested.json' - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_deeply_nested_samplesheet.json").view() + Channel.of('src/testResources/deeply_nested.json') + .fromSamplesheet("src/testResources/samplesheet_schema_deeply_nested.json") + .view() } ''' @@ -417,4 +417,73 @@ class SamplesheetConverterTest extends Dsl2Spec{ noExceptionThrown() stdout.contains("[[mapMeta:this is in a map, arrayMeta:[metaString45, metaString478], otherArrayMeta:[metaString45, metaString478], meta:metaValue, metaMap:[entry1:entry1String, entry2:12.56]], [[string1, string2], string3, 1, 1, ${getRootString()}/file1.txt], [string4, string5, string6], [[string7, string8], [string9, string10]], test]" as String) } + + def 'samplesheetToList - String, String' () { + given: + def SCRIPT_TEXT = ''' + include { samplesheetToList } from 'plugin/nf-schema' + + println(samplesheetToList("src/testResources/correct.csv", "src/testResources/schema_input.json").join("\\n")) + ''' + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.startsWith('[[') ? it : null } + + then: + noExceptionThrown() + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") + stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + } + + def 'samplesheetToList - Path, String' () { + given: + def SCRIPT_TEXT = ''' + include { samplesheetToList } from 'plugin/nf-schema' + + println(samplesheetToList(file("src/testResources/correct.csv", checkIfExists:true), "src/testResources/schema_input.json").join("\\n")) + ''' + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.startsWith('[[') ? it : null } + + then: + noExceptionThrown() + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") + stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + } + + def 'samplesheetToList - String, Path' () { + given: + def SCRIPT_TEXT = ''' + include { samplesheetToList } from 'plugin/nf-schema' + + println(samplesheetToList("src/testResources/correct.csv", file("src/testResources/schema_input.json", checkIfExists:true)).join("\\n")) + ''' + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.startsWith('[[') ? it : null } + + then: + noExceptionThrown() + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") + stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + } } From 1bfc475e84592ba6bebba5bad252182d2066df9f Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 11 Apr 2024 09:56:05 +0200 Subject: [PATCH 07/18] let Nextflow do the file handling --- .../main/nextflow/validation/SchemaValidator.groovy | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index 6ca776e0..d3476efb 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -141,7 +141,8 @@ class SchemaValidator extends PluginExtensionPoint { final String schema, final Map options = null ) { - return samplesheetToList(samplesheet as Path, schema, options) + def Path samplesheetFile = Nextflow.file(samplesheet) as Path + return samplesheetToList(samplesheetFile, schema, options) } @Function @@ -161,7 +162,8 @@ class SchemaValidator extends PluginExtensionPoint { final Path schema, final Map options = null ) { - return samplesheetToList(samplesheet as Path, schema, options) + def Path samplesheetFile = Nextflow.file(samplesheet) as Path + return samplesheetToList(samplesheetFile, schema, options) } @Function @@ -206,7 +208,11 @@ class SchemaValidator extends PluginExtensionPoint { def msg = "${colors.red}The .fromSamplesheet operator only takes a channel with one value per entry (either a String or Path type)\n${colors.reset}\n" throw new SchemaValidationException(msg) } - def SamplesheetConverter converter = new SamplesheetConverter(it as Path, schema, params, options) + def Path samplesheet = it as Path + if(it instanceof String) { + samplesheet = Nextflow.file(it) as Path + } + def SamplesheetConverter converter = new SamplesheetConverter(samplesheet, schema, params, options) def List arrayChannel = converter.validateAndConvertToList() arrayChannel.each { target.bind(it) From 25f0b9c348865ed15facfbd9a688a8a7c7fb8525 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 11 Apr 2024 10:12:15 +0200 Subject: [PATCH 08/18] update docs with the changes --- CHANGELOG.md | 136 +----------------- README.md | 7 +- docs/background.md | 2 + docs/migration_guide.md | 43 ++++-- docs/nextflow_schema/create_schema.md | 2 + docs/samplesheets/examples.md | 12 +- docs/samplesheets/fromSamplesheet.md | 47 +----- docs/samplesheets/samplesheetToList.md | 24 ++++ docs/samplesheets/validate_sample_sheet.md | 4 + .../pipeline/assets/schema_input.json | 12 +- .../fromSamplesheetBasic/pipeline/main.nf | 2 +- .../pipeline/nextflow_schema.json | 39 ----- .../pipeline/assets/schema_input.json | 12 +- examples/fromSamplesheetMeta/pipeline/main.nf | 2 +- .../pipeline/nextflow_schema.json | 39 ----- .../pipeline/assets/schema_input.json | 12 +- .../fromSamplesheetOrder/pipeline/main.nf | 2 +- .../pipeline/nextflow_schema.json | 39 ----- 18 files changed, 90 insertions(+), 346 deletions(-) create mode 100644 docs/samplesheets/samplesheetToList.md delete mode 100644 examples/fromSamplesheetBasic/pipeline/nextflow_schema.json delete mode 100644 examples/fromSamplesheetMeta/pipeline/nextflow_schema.json delete mode 100644 examples/fromSamplesheetOrder/pipeline/nextflow_schema.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 520d2732..f09b0381 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,12 @@ # Version 2.0.0 - Kagoshima -:warning: This version contains a number of breaking changes. Please read the changelog carefully before upgrading. :warning: - -To migrate your schemas please follow the [migration guide](https://nextflow-io.github.io/nf-validation/latest/migration_guide/) +To migrate from nf-validation please follow the [migration guide](https://nextflow-io.github.io/nf-validation/latest/migration_guide/) ## New features - Added the `uniqueEntries` keyword. This keyword takes a list of strings corresponding to names of fields that need to be a unique combination. e.g. `uniqueEntries: ['sample', 'replicate']` will make sure that the combination of the `sample` and `replicate` fields is unique. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Added `samplesheetToList` which is the function equivalent of `.fromSamplesheet` [#3](https://github.com/nextflow-io/nf-schema/pull/3) ## Changes @@ -16,6 +15,7 @@ To migrate your schemas please follow the [migration guide](https://nextflow-io. - Removed all validation code from the `.fromSamplesheet()` channel factory. The validation is now solely done in the `validateParameters()` function. A custom error message will now be displayed if any error has been encountered during the conversion ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - Removed the `unique` keyword from the samplesheet schema. You should now use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or `uniqueEntries` instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - Removed the `skip_duplicate_check` option from the `fromSamplesheet()` channel factory and the `--validationSkipDuplicateCheck` parameter. You should now use the `uniqueEntries` or [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) keywords in the schema instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- `.fromSamplesheet()` now is a channel operator instead of a channel factory. It takes one required argument which can either be a string containing the relative path to the schema or a file object of the schema [#3](https://github.com/nextflow-io/nf-schema/pull/3) - `.fromSamplesheet()` now does dynamic typecasting instead of using the `type` fields in the JSON schema. This is done due to the complexity of `draft-2020-12` JSON schemas. This should not have that much impact but keep in mind that some types can be different between this and earlier versions because of this ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - `.fromSamplesheet()` will now set all missing values as `[]` instead of the type specific defaults (because of the changes in the previous point). This should not change that much as this will also result in `false` when used in conditions. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) @@ -25,133 +25,3 @@ To migrate your schemas please follow the [migration guide](https://nextflow-io. - The `schema` keyword will now work in all schemas. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - Improved the error messages ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - `.fromSamplesheet()` now supports deeply nested samplesheets ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - -# Version 1.1.3 - Asahikawa - -## Improvements - -- Added support for double quotes (`"`) in CSV and TSV samplesheets ([#134](https://github.com/nextflow-io/nf-validation/pull/134)) - -# Version 1.1.2 - Wakayama - -## Bug fixes - -- Fixed an issue with inputs using `file-path-pattern` where only one file was found (`Path` casting to `ArrayList` error) ([#132](https://github.com/nextflow-io/nf-validation/pull/132)) - -# Version 1.1.1 - Shoyu - -## Bug fixes - -- Fixed an issue where samplesheet with a lot of null values would take forever to validate ([#120](https://github.com/nextflow-io/nf-validation/pull/120)) => Thanks @awgymer for fixing this! -- Now YAML files are actually validated instead of skipped ([#124](https://github.com/nextflow-io/nf-validation/pull/120)) - -# Version 1.1.0 - Miso - -## Features - -- Add support for samplesheets with no header ([#115](https://github.com/nextflow-io/nf-validation/pull/115)) - -## Bug fixes - -- Floats and doubles should now be created when using the `number` type in the schema ([#113](https://github.com/nextflow-io/nf-validation/pull/113/)) -- When `0` is used as a default value in the schema, a `0` will now be used as the value in the `.fromSamplesheet()` channel instead of `null` ([#114](https://github.com/nextflow-io/nf-validation/pull/114)) - -## New features - -- Added `file-path-pattern` format to check every file fetched using a glob pattern. Using a glob is now also possible in the samplesheet and will create a list of all files found using that glob pattern. ([#118](https://github.com/nextflow-io/nf-validation/pull/118)) - -# Version 1.0.0 - Tonkotsu - -The nf-validation plugin is now in production use across many pipelines and has (we hope) now reached a point of relative stability. The bump to major version v1.0.0 signifies that it is suitable for use in production pipelines. - -This version also introduces a small breaking change of syntax when providing optional arguments to the functions. You can now provide optional arguments such as the nextflow parameters schema path as: -`validateParameters(parameters_schema: 'my_file.json')` - -(previous syntax used positional arguments instead). - -## Bug fixes - -- The path to a custom parameters schema must be provided through a map '`parameters_schema: 'my_file.json'`' in `validateParameters()` and `paramsSummaryMap()` ([#108](https://github.com/nextflow-io/nf-validation/pull/108)) - -# Version 0.3.4 - -This version introduced a bug which made all pipeline runs using the function `validateParameters()` without providing any arguments fail. - -This bug causes Nextflow to exit with an error on launch for most pipelines. It should not be used. It was [removed](https://github.com/nextflow-io/plugins/pull/40) from the Nextflow Plugin registry to avoid breaking people's runs. - -### Bug fixes - -- Do not check S3 URL paths with `PathValidator` `FilePathValidator` and `DirectoryPathValidator` ([#106](https://github.com/nextflow-io/nf-validation/pull/106)) -- Make monochrome_logs an option in `paramsSummaryLog()`, `paramsSummaryMap()` and `paramsHelp()` instead of a global parameter ([#101](https://github.com/nextflow-io/nf-validation/pull/101)) - -# Version 0.3.3 - -### Bug fixes - -- Do not check if S3 URL paths exists to avoid AWS errors, and add a new parameter `validationS3PathCheck` ([#104](https://github.com/nextflow-io/nf-validation/pull/104)) - -# Version 0.3.2 - -### Bug fixes - -- Add parameters defined on the top level of the schema and within the definitions section as expected params ([#79](https://github.com/nextflow-io/nf-validation/pull/79)) -- Fix error when a parameter is not present in the schema and evaluates to false ([#89](https://github.com/nextflow-io/nf-validation/pull/89)) -- Changed the `schema_filename` option of `fromSamplesheet` to `parameters_schema` to make this option more clear to the user ([#91](https://github.com/nextflow-io/nf-validation/pull/91)) - -## Version 0.3.1 - -### Bug fixes - -- Don't check if path exists if param is not true ([#74](https://github.com/nextflow-io/nf-validation/pull/74)) -- Don't validate a file if the parameter evaluates to false ([#75](https://github.com/nextflow-io/nf-validation/pull/75)) - -## Version 0.3.0 - -### New features - -- Check that a sample sheet doesn't have duplicated entries by default. Can be disabled with `--validationSkipDuplicateCheck` ([#72](https://github.com/nextflow-io/nf-validation/pull/72)) - -### Bug fixes - -- Only validate a path if it is not null ([#50](https://github.com/nextflow-io/nf-validation/pull/50)) -- Only validate a file with a schema if the file path is provided ([#51](https://github.com/nextflow-io/nf-validation/pull/51)) -- Handle errors when sample sheet not provided or doesn't have a schema ([#56](https://github.com/nextflow-io/nf-validation/pull/56)) -- Silently ignore samplesheet fields that are not defined in samplesheet schema ([#59](https://github.com/nextflow-io/nf-validation/pull/59)) -- Correctly handle double-quoted fields containing commas in csv files by `.fromSamplesheet()` ([#63](https://github.com/nextflow-io/nf-validation/pull/63)) -- Print param name when path does not exist ([#65](https://github.com/nextflow-io/nf-validation/pull/65)) -- Fix file or directory does not exist error not printed when it was the only error in a samplesheet ([#65](https://github.com/nextflow-io/nf-validation/pull/65)) -- Do not return parameter in summary if it has no default in the schema and is set to 'false' ([#66](https://github.com/nextflow-io/nf-validation/pull/66)) -- Skip the validation of a file if the path is an empty string and improve error message when the path is invalid ([#69](https://github.com/nextflow-io/nf-validation/pull/69)) - -### Deprecated - -- The meta map of input channels is not an ImmutableMap anymore ([#68](https://github.com/nextflow-io/nf-validation/pull/68)). Reason: [Issue #52](https://github.com/nextflow-io/nf-validation/issues/52) - -## Version 0.2.1 - -### Bug fixes - -- Fixed a bug where `immutable_meta` option in `fromSamplesheet()` wasn't working when using `validateParameters()` first. (@nvnieuwk) - -## Version 0.2.0 - -### New features - -- Added a new [documentation site](https://nextflow-io.github.io/nf-validation/). (@ewels and @mashehu) -- Removed the `file-path-exists`, `directory-path-exists` and `path-exists` and added a [`exists`](https://nextflow-io.github.io/nf-validation/nextflow_schema/nextflow_schema_specification/#exists) parameter to the schema. (@mirpedrol) -- New [`errorMessage`](https://nextflow-io.github.io/nf-validation/nextflow_schema/nextflow_schema_specification/#errormessage) parameter for the schema which can be used to create custom error messages. (@mirpedrol) -- Samplesheet validation now happens in `validateParameters()` using the schema specified by the `schema` parameter in the parameters schema. (@mirpedrol) - -### Improvements - -- The `meta` maps are now immutable by default, see [`ImmutableMap`](https://nextflow-io.github.io/nf-validation/samplesheets/immutable_map/) for more info (@nvnieuwk) -- `validateAndConvertSamplesheet()` has been renamed to `fromSamplesheet()` -- Refactor `--schema_ignore_params` to `--validationSchemaIgnoreParams` - -### Bug fixes - -- Fixed a bug where an empty meta map would be created when no meta values are in the samplesheet schema. (@nvnieuwk) - -## Version 0.1.0 - -Initial release. diff --git a/README.md b/README.md index 8b6b13ac..1f0f432d 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,8 @@ validateParameters() // Print summary of supplied parameters log.info paramsSummaryLog(workflow) -// Create a new channel of metadata from a sample sheet -// NB: `input` corresponds to `params.input` and associated sample sheet schema -ch_input = Channel.fromSamplesheet("input") +// Create a new channel of metadata from a sample sheet passed to the pipeline through the --input parameter +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ``` ## Dependencies @@ -62,7 +61,7 @@ ch_input = Channel.fromSamplesheet("input") ## Slack channel -There is a dedicated [nf-validation Slack channel](https://nfcore.slack.com/archives/C056RQB10LU) in the [Nextflow Slack workspace](nextflow.slack.com). +There is a dedicated [nf-validation Slack channel](https://nfcore.slack.com/archives/C056RQB10LU) in the [Nextflow Slack workspace](https://nextflow.slack.com). ## Credits diff --git a/docs/background.md b/docs/background.md index c0091c6c..95dc61bf 100644 --- a/docs/background.md +++ b/docs/background.md @@ -15,3 +15,5 @@ In addition to config params, a common best-practice for pipelines is to use a " Nextflow itself does not provide functionality to validate config parameters or parsed sample sheets. To bridge this gap, we developed code within the [nf-core community](https://nf-co.re/) to allow pipelines to work with a standard `nextflow_schema.json` file, written using the [JSON Schema](https://json-schema.org/) format. The file allows strict typing of parameter variables and inclusion of validation rules. The nf-schema plugin moves this code out of the nf-core template into a stand-alone package, to make it easier to use for the wider Nextflow community. It also incorporates a number of new features, such as native Groovy sample sheet validation. + +Earlier versions of the plugin can be found in the [nf-validation](https://github.com/nextflow-io/nf-validation) repository and can still be used in the pipeline. However the nf-validation plugin is no longer supported and all development has been moved to nf-schema. diff --git a/docs/migration_guide.md b/docs/migration_guide.md index 09d74ac4..0db3f9f9 100644 --- a/docs/migration_guide.md +++ b/docs/migration_guide.md @@ -1,21 +1,22 @@ --- title: Migration guide -description: Guide to migrate pipelines using nf-schema pre v2.0.0 to after v2.0.0 +description: Guide to migrate pipelines from nf-validation to nf-schema hide: - toc --- # Migration guide -This guide is intended to help you migrate your pipeline from older versions of the plugin to version 2.0.0 and later. +This guide is intended to help you migrate your pipeline from [nf-validation](https://github.com/nextflow-io/nf-validation) to nf-schema. ## Major changes in the plugin -Following list shows the major breaking changes introduced in version 2.0.0: +Following list shows the major breaking changes introduced in nf-schema: 1. The JSON schema draft has been updated from `draft-07` to `draft-2020-12`. See [JSON Schema draft 2020-12 release notes](https://json-schema.org/draft/2020-12/release-notes) and [JSON schema draft 2019-09 release notes](https://json-schema.org/draft/2019-09/release-notes) for more information. -2. The `unique` keyword for samplesheet schemas has been removed. Please use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or [`uniqueEntries`](nextflow_schema/nextflow_schema_specification.md#uniqueentries) now instead. -3. The `dependentRequired` keyword now works as it's supposed to work in JSON schema. See [`dependentRequired`](https://json-schema.org/understanding-json-schema/reference/conditionals#dependentRequired) for more information +2. The `fromSamplesheet` channel factory has been converted to a channel operator. See [updating `fromSamplesheet`](#updating-fromsamplesheet) for more information. +3. The `unique` keyword for samplesheet schemas has been removed. Please use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or [`uniqueEntries`](nextflow_schema/nextflow_schema_specification.md#uniqueentries) now instead. +4. The `dependentRequired` keyword now works as it's supposed to work in JSON schema. See [`dependentRequired`](https://json-schema.org/understanding-json-schema/reference/conditionals#dependentRequired) for more information A full list of changes can be found in the [changelog](https://github.com/nextflow-io/nf-schema/blob/master/CHANGELOG.md). @@ -31,9 +32,27 @@ This will replace the old schema draft specification (`draft-07`) by the new one !!! note - Repeat this command for every JSON schema you use in your pipeline. e.g. for the default samplesheet schema: + Repeat this command for every JSON schema you use in your pipeline. e.g. for the default samplesheet schema in nf-core pipelines: `bash sed -i -e 's/http:\/\/json-schema.org\/draft-07\/schema/https:\/\/json-schema.org\/draft\/2020-12\/schema/g' -e 's/definitions/defs/g' assets/schema_input.json ` +Next you should update the `.fromSamplesheet` channel factory to the channel operator. Following tabs shows the difference between the versions: + +=== "nf-validation" + + ```groovy + Channel.fromSamplesheet("input") + ``` + +=== "nf-schema" + + ```groovy + Channel.of(params.input).fromSamplesheet("path/to/samplesheet/schema") + ``` + +!!! note + + This change was necessary to make it possible for pipelines to be used as pluggable workflows. This also enables the validation and conversion of files generated by the pipeline. + If you are using any special features in your schemas, you will need to update your schemas manually. Please refer to the [JSON Schema draft 2020-12 release notes](https://json-schema.org/draft/2020-12/release-notes) and [JSON schema draft 2019-09 release notes](https://json-schema.org/draft/2019-09/release-notes) for more information. However here are some guides to the more common migration patterns: @@ -44,7 +63,7 @@ When you use `unique` in your schemas, you should update it to use `uniqueItems` If you used the `unique:true` field, you should update it to use `uniqueItems` like this: -=== "Before v2.0" +=== "nf-validation" ```json hl_lines="9" { @@ -62,7 +81,7 @@ If you used the `unique:true` field, you should update it to use `uniqueItems` l } ``` -=== "After v2.0" +=== "nf-schema" ```json hl_lines="12" { @@ -82,7 +101,7 @@ If you used the `unique:true` field, you should update it to use `uniqueItems` l If you used the `unique: ["field1", "field2"]` field, you should update it to use `uniqueEntries` like this: -=== "Before v2.0" +=== "nf-validation" ```json hl_lines="9" { @@ -100,7 +119,7 @@ If you used the `unique: ["field1", "field2"]` field, you should update it to us } ``` -=== "After v2.0" +=== "nf-schema" ```json hl_lines="12" { @@ -122,7 +141,7 @@ If you used the `unique: ["field1", "field2"]` field, you should update it to us When you use `dependentRequired` in your schemas, you should update it like this: -=== "Before v2.0" +=== "nf-validation" ```json hl_lines="12" { @@ -142,7 +161,7 @@ When you use `dependentRequired` in your schemas, you should update it like this } ``` -=== "After v2.0" +=== "nf-schema" ```json hl_lines="14 15 16" { diff --git a/docs/nextflow_schema/create_schema.md b/docs/nextflow_schema/create_schema.md index 2b248e5e..5fd9ea57 100644 --- a/docs/nextflow_schema/create_schema.md +++ b/docs/nextflow_schema/create_schema.md @@ -76,4 +76,6 @@ This web interface is where you should add detail to your schema, customising th There is currently no tooling to help you write sample sheet schema :anguished: + You can find an example in [Example sample sheet schema](sample_sheet_schema_examples.md) + Watch this space.. diff --git a/docs/samplesheets/examples.md b/docs/samplesheets/examples.md index 170bea9a..88c7a1c5 100644 --- a/docs/samplesheets/examples.md +++ b/docs/samplesheets/examples.md @@ -7,7 +7,7 @@ description: Examples of advanced sample sheet creation techniques. ## Introduction -Understanding channel structure and manipulation is critical for getting the most out of Nextflow. nf-schema helps initialise your channels from the text inputs to get you started, but further work might be required to fit your exact use case. In this page we run through some common cases for transforming the output of `.fromSamplesheet`. +Understanding channel structure and manipulation is critical for getting the most out of Nextflow. nf-schema helps initialise your channels from the text inputs to get you started, but further work might be required to fit your exact use case. In this page we run through some common cases for transforming the output of `.fromSamplesheet()`. ### Glossary @@ -104,7 +104,7 @@ val(meta), path(fastq_1), path(fastq_2) To manipulate each item within a channel, you should use the [Nextflow `.map()` operator](https://www.nextflow.io/docs/latest/operator.html#map). This will apply a function to each element of the channel in turn. Here, we convert the flat tuple into a tuple composed of a meta and a list of FASTQ files: ```groovy -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .map { meta, fastq_1, fastq_2 -> tuple(meta, [ fastq_1, fastq_2 ]) } .set { input } @@ -122,7 +122,7 @@ ZCAT_FASTQS(input) For example, to remove the BED file from the channel created above, we could not return it from the map. Note the absence of the `bed` item in the return of the closure below: ```groovy -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .map { meta, fastq_1, fastq_2, bed -> tuple(meta, fastq_1, fastq_2) } .set { input } @@ -136,7 +136,7 @@ In this way you can drop items from a channel. We could perform this twice to create one channel containing the FASTQs and one containing the BED files, however Nextflow has a native operator to separate channels called [`.multiMap()`](https://www.nextflow.io/docs/latest/operator.html#multimap). Here, we separate the FASTQs and BEDs into two separate channels using `multiMap`. Note, the channels are both contained in `input` and accessed as an attribute using dot notation: ```groovy -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .multiMap { meta, fastq_1, fastq_2, bed -> fastq: tuple(meta, fastq_1, fastq_2) bed: tuple(meta, bed) @@ -163,7 +163,7 @@ This example shows a channel which can have entries for WES or WGS data. WES dat // Channel with four elements - see docs for examples params.input = "samplesheet.csv" -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .branch { meta, fastq_1, fastq_2, bed -> // If BED does not exist WGS: !bed @@ -211,7 +211,7 @@ This example contains a channel where multiple samples can be in the same family params.input = "sample sheet.csv" -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .tap { ch_raw } // Create a copy of the original channel .map { meta, txt -> [ meta.family ] } // Isolate the value to count on .reduce([:]) { counts, family -> // Creates a map like this: [family1:2, family2:1] diff --git a/docs/samplesheets/fromSamplesheet.md b/docs/samplesheets/fromSamplesheet.md index 6a4f56e3..9f21eb54 100644 --- a/docs/samplesheets/fromSamplesheet.md +++ b/docs/samplesheets/fromSamplesheet.md @@ -1,36 +1,19 @@ --- title: Create a channel -description: Channel factory to create a channel from a sample sheet. +description: Channel operator to create a channel from a sample sheet. --- # Create a channel from a sample sheet ## `fromSamplesheet` -This function validates and converts a sample sheet to a ready-to-use Nextflow channel. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). +This channel operator validates and converts a sample sheet to ready-to-use channel entries. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). -The function has one mandatory argument: the name of the parameter which specifies the input sample sheet. The parameter specified must have the format `file-path` and include additional field `schema`: +The operator has one mandatory argument: the path of the JSON schema file corresponding to the samplesheet. This can be either a string with the relative path (from the root of the pipeline) or a file object of the schema. -```json hl_lines="4" -{ - "type": "string", - "format": "file-path", - "schema": "assets/foo_schema.json" -} -``` - -The path specified in the `schema` key determines the JSON used for validation of the sample sheet. - -When using the `.fromSamplesheet` channel factory, one optional arguments can be used: - -- `parameters_schema`: File name for the pipeline parameters schema. (Default: `nextflow_schema.json`) ```groovy -Channel.fromSamplesheet('input') -``` - -```groovy -Channel.fromSamplesheet('input', parameters_schema: 'custom_nextflow_schema.json') +Channel.of("path/to/samplesheet").fromSamplesheet("path/to/json/schema") ``` ## Basic example @@ -59,12 +42,6 @@ In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/ --8<-- "examples/fromSamplesheetBasic/pipeline/nextflow.config" ``` -=== "nextflow_schema.json" - - ```json hl_lines="19" - --8<-- "examples/fromSamplesheetBasic/pipeline/nextflow_schema.json" - ``` - === "assets/schema_input.json" ```json @@ -91,7 +68,7 @@ In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/ === "assets/schema_input.json" - ```json hl_lines="10 15 20 33" + ```json hl_lines="10 15 20 25" --8<-- "examples/fromSamplesheetOrder/pipeline/assets/schema_input.json" ``` @@ -107,12 +84,6 @@ In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/ --8<-- "examples/fromSamplesheetOrder/pipeline/nextflow.config" ``` -=== "nextflow_schema.json" - - ```json - --8<-- "examples/fromSamplesheetOrder/pipeline/nextflow_schema.json" - ``` - ## Channel with meta map In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/fromSamplesheetMeta), we use the schema to mark two columns as meta fields. @@ -124,7 +95,7 @@ This returns a channel with a meta map. === "assets/schema_input.json" - ```json hl_lines="14 38" + ```json hl_lines="14 30" --8<-- "examples/fromSamplesheetMeta/pipeline/assets/schema_input.json" ``` @@ -145,9 +116,3 @@ This returns a channel with a meta map. ```groovy --8<-- "examples/fromSamplesheetMeta/pipeline/nextflow.config" ``` - -=== "nextflow_schema.json" - - ```json - --8<-- "examples/fromSamplesheetMeta/pipeline/nextflow_schema.json" - ``` diff --git a/docs/samplesheets/samplesheetToList.md b/docs/samplesheets/samplesheetToList.md new file mode 100644 index 00000000..933e1f62 --- /dev/null +++ b/docs/samplesheets/samplesheetToList.md @@ -0,0 +1,24 @@ +--- +title: Create a list +description: Function to create a list from a sample sheet. +--- + +# Create a list from a sample sheet + +## `samplesheetToList` + +This function validates and converts a sample sheet to a list in a similar way to the [`fromSamplesheet`](./fromSamplesheet.md) channel operator. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). + +The function has two mandatory arguments: +1. The path to the samplesheet +2. The path to the JSON schema file corresponding to the samplesheet. + +These can be either a string with the relative path (from the root of the pipeline) or a file object of the schema. + +```groovy +samplesheetToList("path/to/samplesheet", "path/to/json/schema") +``` + +!!! note + + This function works very similar to the `fromSamplesheet` channel operator. See the [`fromSamplesheet` examples](./fromSamplesheet.md#basic-example) for some examples on how to use this function. diff --git a/docs/samplesheets/validate_sample_sheet.md b/docs/samplesheets/validate_sample_sheet.md index 9fef1eaf..ccede6c0 100644 --- a/docs/samplesheets/validate_sample_sheet.md +++ b/docs/samplesheets/validate_sample_sheet.md @@ -26,4 +26,8 @@ See an example in the `input` field from the [example schema.json](https://raw.g } ``` +!!! warning + + The `.fromSamplesheet` channel operator and `samplesheetToList` also validate the files before converting them. If you convert the samplesheet, you should not add a schema to the parameter corresponding to the samplesheet to keep your pipeline as efficient as possible. + For more information about the sample sheet JSON schema refer to [sample sheet docs](../nextflow_schema/nextflow_schema_specification.md). diff --git a/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json b/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json index aa527ed5..56f6a959 100644 --- a/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json @@ -19,16 +19,8 @@ }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, "strandedness": { "type": "string", diff --git a/examples/fromSamplesheetBasic/pipeline/main.nf b/examples/fromSamplesheetBasic/pipeline/main.nf index a02f1ac8..924be3da 100644 --- a/examples/fromSamplesheetBasic/pipeline/main.nf +++ b/examples/fromSamplesheetBasic/pipeline/main.nf @@ -1,5 +1,5 @@ include { fromSamplesheet } from 'plugin/nf-schema' -ch_input = Channel.fromSamplesheet("input") +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ch_input.view() diff --git a/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json b/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json deleted file mode 100644 index 6096ceb9..00000000 --- a/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", - "title": "nf-core/testpipeline pipeline parameters", - "description": "this is a test", - "type": "object", - "defs": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "schema": "assets/schema_input.json", - "pattern": "^\\S+\\.(csv|tsv|yaml|json)$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - } - } - } - }, - "allOf": [ - { - "$ref": "#/defs/input_output_options" - } - ] -} diff --git a/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json b/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json index 7a931a25..ab42363a 100644 --- a/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json @@ -20,16 +20,8 @@ }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, "strandedness": { "type": "string", diff --git a/examples/fromSamplesheetMeta/pipeline/main.nf b/examples/fromSamplesheetMeta/pipeline/main.nf index a02f1ac8..924be3da 100644 --- a/examples/fromSamplesheetMeta/pipeline/main.nf +++ b/examples/fromSamplesheetMeta/pipeline/main.nf @@ -1,5 +1,5 @@ include { fromSamplesheet } from 'plugin/nf-schema' -ch_input = Channel.fromSamplesheet("input") +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ch_input.view() diff --git a/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json b/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json deleted file mode 100644 index 6096ceb9..00000000 --- a/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", - "title": "nf-core/testpipeline pipeline parameters", - "description": "this is a test", - "type": "object", - "defs": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "schema": "assets/schema_input.json", - "pattern": "^\\S+\\.(csv|tsv|yaml|json)$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - } - } - } - }, - "allOf": [ - { - "$ref": "#/defs/input_output_options" - } - ] -} diff --git a/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json b/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json index a51e24f6..fbbd703e 100644 --- a/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json @@ -19,16 +19,8 @@ }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, "fastq_1": { "type": "string", diff --git a/examples/fromSamplesheetOrder/pipeline/main.nf b/examples/fromSamplesheetOrder/pipeline/main.nf index a02f1ac8..924be3da 100644 --- a/examples/fromSamplesheetOrder/pipeline/main.nf +++ b/examples/fromSamplesheetOrder/pipeline/main.nf @@ -1,5 +1,5 @@ include { fromSamplesheet } from 'plugin/nf-schema' -ch_input = Channel.fromSamplesheet("input") +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ch_input.view() diff --git a/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json b/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json deleted file mode 100644 index 6096ceb9..00000000 --- a/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", - "title": "nf-core/testpipeline pipeline parameters", - "description": "this is a test", - "type": "object", - "defs": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "schema": "assets/schema_input.json", - "pattern": "^\\S+\\.(csv|tsv|yaml|json)$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - } - } - } - }, - "allOf": [ - { - "$ref": "#/defs/input_output_options" - } - ] -} From c84c55e979b892438671aceb558b0a15fbef47ad Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 11 Apr 2024 10:13:10 +0200 Subject: [PATCH 09/18] prettier --- docs/samplesheets/fromSamplesheet.md | 1 - docs/samplesheets/samplesheetToList.md | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/samplesheets/fromSamplesheet.md b/docs/samplesheets/fromSamplesheet.md index 9f21eb54..d99682cb 100644 --- a/docs/samplesheets/fromSamplesheet.md +++ b/docs/samplesheets/fromSamplesheet.md @@ -11,7 +11,6 @@ This channel operator validates and converts a sample sheet to ready-to-use chan The operator has one mandatory argument: the path of the JSON schema file corresponding to the samplesheet. This can be either a string with the relative path (from the root of the pipeline) or a file object of the schema. - ```groovy Channel.of("path/to/samplesheet").fromSamplesheet("path/to/json/schema") ``` diff --git a/docs/samplesheets/samplesheetToList.md b/docs/samplesheets/samplesheetToList.md index 933e1f62..9c6b1b59 100644 --- a/docs/samplesheets/samplesheetToList.md +++ b/docs/samplesheets/samplesheetToList.md @@ -10,8 +10,9 @@ description: Function to create a list from a sample sheet. This function validates and converts a sample sheet to a list in a similar way to the [`fromSamplesheet`](./fromSamplesheet.md) channel operator. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). The function has two mandatory arguments: + 1. The path to the samplesheet -2. The path to the JSON schema file corresponding to the samplesheet. +2. The path to the JSON schema file corresponding to the samplesheet. These can be either a string with the relative path (from the root of the pipeline) or a file object of the schema. From 773e3090b80e8649547ffc420763309e59353ec1 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 11 Apr 2024 10:23:32 +0200 Subject: [PATCH 10/18] fix tests --- .../test/nextflow/validation/SamplesheetConverterTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy index bed7b5d8..61dc7f8d 100644 --- a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy +++ b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy @@ -338,7 +338,7 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("Found the following unidentified headers in src/testResources/extraFields.csv:") + stdout.contains("Found the following unidentified headers in ${getRootString()}/src/testResources/extraFields.csv:" as String) stdout.contains("\t- extraField") stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, [], unique1, 1, itDoesExist]" as String) stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25, false, [], [], [], [], [], itDoesExist]") From 5bcd13ce3a6f8197bac47c5a7c4e0e8abb7c40c1 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> Date: Mon, 15 Apr 2024 12:50:43 +0200 Subject: [PATCH 11/18] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: JĂșlia Mir Pedrol --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f09b0381..93e8b5b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ # Version 2.0.0 - Kagoshima -To migrate from nf-validation please follow the [migration guide](https://nextflow-io.github.io/nf-validation/latest/migration_guide/) +To migrate from nf-validation please follow the [migration guide](https://nextflow-io.github.io/nf-schema/latest/migration_guide/) ## New features From cfd0f604eaae587ad745fe4bdf6f5b38fda84b5b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 15 Apr 2024 13:34:48 +0200 Subject: [PATCH 12/18] apply review suggestions --- docs/samplesheets/validate_sample_sheet.md | 4 ++-- .../SamplesheetConverterTest.groovy | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/samplesheets/validate_sample_sheet.md b/docs/samplesheets/validate_sample_sheet.md index ccede6c0..71f856fe 100644 --- a/docs/samplesheets/validate_sample_sheet.md +++ b/docs/samplesheets/validate_sample_sheet.md @@ -26,8 +26,8 @@ See an example in the `input` field from the [example schema.json](https://raw.g } ``` -!!! warning +!!! note - The `.fromSamplesheet` channel operator and `samplesheetToList` also validate the files before converting them. If you convert the samplesheet, you should not add a schema to the parameter corresponding to the samplesheet to keep your pipeline as efficient as possible. + The `.fromSamplesheet` channel operator and `samplesheetToList` function also validate the files before converting them. If you convert the samplesheet, it's not necessary to add a schema to the parameter corresponding to the samplesheet. For more information about the sample sheet JSON schema refer to [sample sheet docs](../nextflow_schema/nextflow_schema_specification.md). diff --git a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy index 61dc7f8d..1bd268b0 100644 --- a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy +++ b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy @@ -486,4 +486,27 @@ class SamplesheetConverterTest extends Dsl2Spec{ stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) } + + def 'samplesheetToList - Path, Path' () { + given: + def SCRIPT_TEXT = ''' + include { samplesheetToList } from 'plugin/nf-schema' + + println(samplesheetToList(file("src/testResources/correct.csv", checkIfExists:true), file("src/testResources/schema_input.json", checkIfExists:true)).join("\\n")) + ''' + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.startsWith('[[') ? it : null } + + then: + noExceptionThrown() + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") + stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + } } From c9711580bd77fe5ccc35436816017fe9e0a062b2 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 15 Apr 2024 17:39:32 +0200 Subject: [PATCH 13/18] fixed error with GString inputs --- .../src/main/nextflow/validation/SchemaValidator.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index d3476efb..c0b6271e 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -204,7 +204,7 @@ class SchemaValidator extends PluginExtensionPoint { final target = CH.createBy(source) final next = { - if(!(it instanceof String || it instanceof Path)) { + if(!(it instanceof String || it instanceof Path || it instanceof GString)) { def msg = "${colors.red}The .fromSamplesheet operator only takes a channel with one value per entry (either a String or Path type)\n${colors.reset}\n" throw new SchemaValidationException(msg) } From f02b2400b0af65690caa7b2c4f28419a6318755c Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 16 Apr 2024 09:31:51 +0200 Subject: [PATCH 14/18] Use CharSequence instead of String and GString --- .../nextflow/validation/SchemaValidator.groovy | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index c0b6271e..1e558f01 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -137,8 +137,8 @@ class SchemaValidator extends PluginExtensionPoint { @Function public List samplesheetToList( - final String samplesheet, - final String schema, + final CharSequence samplesheet, + final CharSequence schema, final Map options = null ) { def Path samplesheetFile = Nextflow.file(samplesheet) as Path @@ -148,17 +148,17 @@ class SchemaValidator extends PluginExtensionPoint { @Function public List samplesheetToList( final Path samplesheet, - final String schema, + final CharSequence schema, final Map options = null ) { - def String fullPathSchema = Utils.getSchemaPath(session.baseDir.toString(), schema) + def String fullPathSchema = Utils.getSchemaPath(session.baseDir.toString(), schema as String) def Path schemaFile = Nextflow.file(fullPathSchema) as Path return samplesheetToList(samplesheet, schemaFile, options) } @Function public List samplesheetToList( - final String samplesheet, + final CharSequence samplesheet, final Path schema, final Map options = null ) { @@ -179,10 +179,10 @@ class SchemaValidator extends PluginExtensionPoint { @Operator public DataflowWriteChannel fromSamplesheet( final DataflowReadChannel source, - final String schema, + final CharSequence schema, final Map options = null ) { - def String fullPathSchema = Utils.getSchemaPath(session.baseDir.toString(), schema) + def String fullPathSchema = Utils.getSchemaPath(session.baseDir.toString(), schema as String) def Path schemaFile = Nextflow.file(fullPathSchema) as Path return fromSamplesheet(source, schemaFile, options) } @@ -204,7 +204,7 @@ class SchemaValidator extends PluginExtensionPoint { final target = CH.createBy(source) final next = { - if(!(it instanceof String || it instanceof Path || it instanceof GString)) { + if(!(it instanceof CharSequence || it instanceof Path)) { def msg = "${colors.red}The .fromSamplesheet operator only takes a channel with one value per entry (either a String or Path type)\n${colors.reset}\n" throw new SchemaValidationException(msg) } From 321c245ab61c086aed2735a529a0015a4331963a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 17 Apr 2024 14:37:47 +0200 Subject: [PATCH 15/18] add make install as shown in the podcast --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 4fc1c914..7fad7429 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ config ?= compileClasspath +version ?= $(shell grep 'Plugin-Version' plugins/nf-schema/src/resources/META-INF/MANIFEST.MF | awk '{ print $$2 }') ifdef module mm = :${module}: @@ -45,6 +46,10 @@ else endif +install: + ./gradlew copyPluginZip + rm -rf ${HOME}/.nextflow/plugins/nf-schema-${version} + cp -r build/plugins/nf-schema-${version} ${HOME}/.nextflow/plugins/nf-schema-${version} # # Upload JAR artifacts to Maven Central From 68117c0da71373898a03e834b91391f6a4290072 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 18 Apr 2024 11:31:09 +0200 Subject: [PATCH 16/18] remove fromSamplesheet and only use samplesheetToList --- CHANGELOG.md | 9 +- README.md | 6 +- docs/migration_guide.md | 8 +- .../sample_sheet_schema_specification.md | 8 +- docs/samplesheets/examples.md | 32 ++--- docs/samplesheets/fromSamplesheet.md | 117 ----------------- docs/samplesheets/samplesheetToList.md | 124 +++++++++++++++++- docs/samplesheets/validate_sample_sheet.md | 2 +- .../fromSamplesheetBasic/pipeline/main.nf | 5 - examples/fromSamplesheetMeta/pipeline/main.nf | 5 - .../fromSamplesheetOrder/pipeline/main.nf | 5 - .../launch.sh | 0 .../log.txt | 0 .../pipeline/assets/schema_input.json | 0 .../samplesheetToListBasic/pipeline/main.nf | 5 + .../pipeline/nextflow.config | 0 .../samplesheet.csv | 0 .../launch.sh | 0 .../log.txt | 0 .../pipeline/assets/schema_input.json | 0 .../samplesheetToListMeta/pipeline/main.nf | 5 + .../pipeline/nextflow.config | 0 .../samplesheet.csv | 0 .../launch.sh | 0 .../log.txt | 0 .../pipeline/assets/schema_input.json | 0 .../samplesheetToListOrder/pipeline/main.nf | 5 + .../pipeline/nextflow.config | 0 .../samplesheet.csv | 0 .../validation/SchemaValidator.groovy | 50 ------- 30 files changed, 167 insertions(+), 219 deletions(-) delete mode 100644 docs/samplesheets/fromSamplesheet.md delete mode 100644 examples/fromSamplesheetBasic/pipeline/main.nf delete mode 100644 examples/fromSamplesheetMeta/pipeline/main.nf delete mode 100644 examples/fromSamplesheetOrder/pipeline/main.nf rename examples/{fromSamplesheetBasic => samplesheetToListBasic}/launch.sh (100%) rename examples/{fromSamplesheetBasic => samplesheetToListBasic}/log.txt (100%) rename examples/{fromSamplesheetBasic => samplesheetToListBasic}/pipeline/assets/schema_input.json (100%) create mode 100644 examples/samplesheetToListBasic/pipeline/main.nf rename examples/{fromSamplesheetBasic => samplesheetToListBasic}/pipeline/nextflow.config (100%) rename examples/{fromSamplesheetBasic => samplesheetToListBasic}/samplesheet.csv (100%) rename examples/{fromSamplesheetMeta => samplesheetToListMeta}/launch.sh (100%) rename examples/{fromSamplesheetMeta => samplesheetToListMeta}/log.txt (100%) rename examples/{fromSamplesheetMeta => samplesheetToListMeta}/pipeline/assets/schema_input.json (100%) create mode 100644 examples/samplesheetToListMeta/pipeline/main.nf rename examples/{fromSamplesheetMeta => samplesheetToListMeta}/pipeline/nextflow.config (100%) rename examples/{fromSamplesheetMeta => samplesheetToListMeta}/samplesheet.csv (100%) rename examples/{fromSamplesheetOrder => samplesheetToListOrder}/launch.sh (100%) rename examples/{fromSamplesheetOrder => samplesheetToListOrder}/log.txt (100%) rename examples/{fromSamplesheetOrder => samplesheetToListOrder}/pipeline/assets/schema_input.json (100%) create mode 100644 examples/samplesheetToListOrder/pipeline/main.nf rename examples/{fromSamplesheetOrder => samplesheetToListOrder}/pipeline/nextflow.config (100%) rename examples/{fromSamplesheetOrder => samplesheetToListOrder}/samplesheet.csv (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93e8b5b2..a5857adc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,12 +12,11 @@ To migrate from nf-validation please follow the [migration guide](https://nextfl ## Changes - Changed the used draft for the schema from `draft-07` to `draft-2020-12`. See the [2019-09](https://json-schema.org/draft/2019-09/release-notes) and [2020-12](https://json-schema.org/draft/2020-12/release-notes) release notes for all changes ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) -- Removed all validation code from the `.fromSamplesheet()` channel factory. The validation is now solely done in the `validateParameters()` function. A custom error message will now be displayed if any error has been encountered during the conversion ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Removed the `fromSamplesheet` channel operator and added a `samplesheetToList` function instead. This function validates the samplesheet and returns a list of it. [#3](https://github.com/nextflow-io/nf-schema/pull/3) - Removed the `unique` keyword from the samplesheet schema. You should now use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or `uniqueEntries` instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) -- Removed the `skip_duplicate_check` option from the `fromSamplesheet()` channel factory and the `--validationSkipDuplicateCheck` parameter. You should now use the `uniqueEntries` or [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) keywords in the schema instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) -- `.fromSamplesheet()` now is a channel operator instead of a channel factory. It takes one required argument which can either be a string containing the relative path to the schema or a file object of the schema [#3](https://github.com/nextflow-io/nf-schema/pull/3) -- `.fromSamplesheet()` now does dynamic typecasting instead of using the `type` fields in the JSON schema. This is done due to the complexity of `draft-2020-12` JSON schemas. This should not have that much impact but keep in mind that some types can be different between this and earlier versions because of this ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) -- `.fromSamplesheet()` will now set all missing values as `[]` instead of the type specific defaults (because of the changes in the previous point). This should not change that much as this will also result in `false` when used in conditions. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Removed the `skip_duplicate_check` option from the `samplesheetToList()` function and the `--validationSkipDuplicateCheck` parameter. You should now use the `uniqueEntries` or [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) keywords in the schema instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- `samplesheetToList()` now does dynamic typecasting instead of using the `type` fields in the JSON schema. This is done due to the complexity of `draft-2020-12` JSON schemas. This should not have that much impact but keep in mind that some types can be different between this version and older versions in nf-validation ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- `samplesheetToList()` will now set all missing values as `[]` instead of the type specific defaults (because of the changes in the previous point). This should not change that much as this will also result in `false` when used in conditions. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) ## Improvements diff --git a/README.md b/README.md index 1f0f432d..b1871e8c 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ This is all that is needed - Nextflow will automatically fetch the plugin code a You can now include the plugin helper functions into your Nextflow pipeline: ```groovy title="main.nf" -include { validateParameters; paramsHelp; paramsSummaryLog; fromSamplesheet } from 'plugin/nf-schema' +include { validateParameters; paramsHelp; paramsSummaryLog; samplesheetToList } from 'plugin/nf-schema' // Print help message, supply typical command line usage for the pipeline if (params.help) { @@ -51,7 +51,7 @@ validateParameters() log.info paramsSummaryLog(workflow) // Create a new channel of metadata from a sample sheet passed to the pipeline through the --input parameter -ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") +ch_input = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json")) ``` ## Dependencies @@ -61,7 +61,7 @@ ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ## Slack channel -There is a dedicated [nf-validation Slack channel](https://nfcore.slack.com/archives/C056RQB10LU) in the [Nextflow Slack workspace](https://nextflow.slack.com). +There is a dedicated [nf-schema Slack channel](https://nfcore.slack.com/archives/C056RQB10LU) in the [Nextflow Slack workspace](https://nextflow.slack.com). ## Credits diff --git a/docs/migration_guide.md b/docs/migration_guide.md index 0db3f9f9..c5df2e8e 100644 --- a/docs/migration_guide.md +++ b/docs/migration_guide.md @@ -14,7 +14,7 @@ This guide is intended to help you migrate your pipeline from [nf-validation](ht Following list shows the major breaking changes introduced in nf-schema: 1. The JSON schema draft has been updated from `draft-07` to `draft-2020-12`. See [JSON Schema draft 2020-12 release notes](https://json-schema.org/draft/2020-12/release-notes) and [JSON schema draft 2019-09 release notes](https://json-schema.org/draft/2019-09/release-notes) for more information. -2. The `fromSamplesheet` channel factory has been converted to a channel operator. See [updating `fromSamplesheet`](#updating-fromsamplesheet) for more information. +2. The `fromSamplesheet` channel factory has been converted to a function called `samplesheetToList`. See [updating `fromSamplesheet`](#updating-fromsamplesheet) for more information. 3. The `unique` keyword for samplesheet schemas has been removed. Please use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or [`uniqueEntries`](nextflow_schema/nextflow_schema_specification.md#uniqueentries) now instead. 4. The `dependentRequired` keyword now works as it's supposed to work in JSON schema. See [`dependentRequired`](https://json-schema.org/understanding-json-schema/reference/conditionals#dependentRequired) for more information @@ -35,18 +35,20 @@ This will replace the old schema draft specification (`draft-07`) by the new one Repeat this command for every JSON schema you use in your pipeline. e.g. for the default samplesheet schema in nf-core pipelines: `bash sed -i -e 's/http:\/\/json-schema.org\/draft-07\/schema/https:\/\/json-schema.org\/draft\/2020-12\/schema/g' -e 's/definitions/defs/g' assets/schema_input.json ` -Next you should update the `.fromSamplesheet` channel factory to the channel operator. Following tabs shows the difference between the versions: +Next you should update the `.fromSamplesheet` channel factory to the `samplesheetToList` function. Following tabs shows the difference between the versions: === "nf-validation" ```groovy + include { fromSamplesheet } from 'plugin/nf-validation' Channel.fromSamplesheet("input") ``` === "nf-schema" ```groovy - Channel.of(params.input).fromSamplesheet("path/to/samplesheet/schema") + include { samplesheetToList } from 'plugin/nf-schema' + Channel.fromList(samplesheetToList(params.input, "path/to/samplesheet/schema")) ``` !!! note diff --git a/docs/nextflow_schema/sample_sheet_schema_specification.md b/docs/nextflow_schema/sample_sheet_schema_specification.md index 3d2796c8..b27e6648 100644 --- a/docs/nextflow_schema/sample_sheet_schema_specification.md +++ b/docs/nextflow_schema/sample_sheet_schema_specification.md @@ -59,7 +59,7 @@ Fields that are present in the sample sheet, but not in the schema will be ignor !!! warning The order of properties in the _schema_ **is** important. - This order defines the order of output channel properties when using the `fromSamplesheet` channel factory. + This order defines the order of output channel properties when using the `samplesheetToList()` function. ## Common keys @@ -68,12 +68,6 @@ For example: `type`, `pattern`, `format`, `errorMessage`, `exists` and so on. Please refer to the [Nextflow schema specification](../nextflow_schema/nextflow_schema_specification.md) docs for details. -!!! tip - - Sample sheets are commonly used to define input file paths. - Be sure to set `"type": "string"`, `exists: true`, `"format": "file-path"` and `"schema":"path/to/samplesheet/schema.json"` for these properties, - so that samplesheets are correctly validated and `fromSamplesheet` does not result in any errors. - ## Sample sheet keys Below are the properties that are specific to sample sheet schema. diff --git a/docs/samplesheets/examples.md b/docs/samplesheets/examples.md index 88c7a1c5..bc00030b 100644 --- a/docs/samplesheets/examples.md +++ b/docs/samplesheets/examples.md @@ -7,7 +7,7 @@ description: Examples of advanced sample sheet creation techniques. ## Introduction -Understanding channel structure and manipulation is critical for getting the most out of Nextflow. nf-schema helps initialise your channels from the text inputs to get you started, but further work might be required to fit your exact use case. In this page we run through some common cases for transforming the output of `.fromSamplesheet()`. +Understanding channel structure and manipulation is critical for getting the most out of Nextflow. nf-schema helps initialise your channels from the text inputs to get you started, but further work might be required to fit your exact use case. In this page we run through some common cases for transforming the output of `samplesheetToList()`. ### Glossary @@ -17,7 +17,7 @@ Understanding channel structure and manipulation is critical for getting the mos ## Default mode -Each item in the channel emitted by `.fromSamplesheet()` is a tuple, corresponding with each row of the sample sheet. Each item will be composed of a meta value (if present) and any additional elements from columns in the sample sheet, e.g.: +Each item in the list emitted by `samplesheetToList()` is a tuple, corresponding with each row of the sample sheet. Each item will be composed of a meta value (if present) and any additional elements from columns in the sample sheet, e.g.: ```csv sample,fastq_1,fastq_2,bed @@ -25,7 +25,7 @@ sample1,fastq1.R1.fq.gz,fastq1.R2.fq.gz,sample1.bed sample2,fastq2.R1.fq.gz,fastq2.R2.fq.gz, ``` -Might create a channel where each element consists of 4 items, a map value followed by three files: +Might create a list where each element consists of 4 items, a map value followed by three files: ```groovy // Columns: @@ -36,13 +36,13 @@ Might create a channel where each element consists of 4 items, a map value follo [ [ id: "sample2" ], fastq2.R1.fq.gz, fastq2.R2.fq.gz, [] ] // A missing value from the sample sheet is an empty list ``` -This channel can be used as input of a process where the input declaration is: +This list can be converted to a channel that can be used as input of a process where the input declaration is: ```nextflow tuple val(meta), path(fastq_1), path(fastq_2), path(bed) ``` -It may be necessary to manipulate this channel to fit your process inputs. For more documentation, check out the [Nextflow operator docs](https://www.nextflow.io/docs/latest/operator.html), however here are some common use cases with `.fromSamplesheet()`. +It may be necessary to manipulate this channel to fit your process inputs. For more documentation, check out the [Nextflow operator docs](https://www.nextflow.io/docs/latest/operator.html), however here are some common use cases with `samplesheetToList()`. ## Using a sample sheet with no headers @@ -73,7 +73,7 @@ or this YAML file: - test_2 ``` -The output of `.fromSamplesheet()` will look like this: +The output of `samplesheetToList()` will look like this: ```bash test_1 @@ -82,7 +82,7 @@ test_2 ## Changing the structure of channel items -Each item in the channel will be a tuple, but some processes will use multiple files as a list in their input channel, this is common in nf-core modules. For example, consider the following input declaration in a process, where FASTQ could be > 1 file: +Each item in the list will be a tuple, but some processes will use multiple files as a list in their input channel, this is common in nf-core modules. For example, consider the following input declaration in a process, where FASTQ could be > 1 file: ```groovy process ZCAT_FASTQS { @@ -95,7 +95,7 @@ process ZCAT_FASTQS { } ``` -The output of `.fromSamplesheet()` can be used by default with a process with the following input declaration: +The output of `samplesheetToList()` (converted to a channel) can be used by default with a process with the following input declaration: ```groovy val(meta), path(fastq_1), path(fastq_2) @@ -104,7 +104,7 @@ val(meta), path(fastq_1), path(fastq_2) To manipulate each item within a channel, you should use the [Nextflow `.map()` operator](https://www.nextflow.io/docs/latest/operator.html#map). This will apply a function to each element of the channel in turn. Here, we convert the flat tuple into a tuple composed of a meta and a list of FASTQ files: ```groovy -Channel.of(params.input).fromSamplesheet("path/to/json/schema") +Channel.fromList(samplesheetToList(params.input, "path/to/json/schema")) .map { meta, fastq_1, fastq_2 -> tuple(meta, [ fastq_1, fastq_2 ]) } .set { input } @@ -122,7 +122,7 @@ ZCAT_FASTQS(input) For example, to remove the BED file from the channel created above, we could not return it from the map. Note the absence of the `bed` item in the return of the closure below: ```groovy -Channel.of(params.input).fromSamplesheet("path/to/json/schema") +Channel.fromList(samplesheetToList(params.input, "path/to/json/schema")) .map { meta, fastq_1, fastq_2, bed -> tuple(meta, fastq_1, fastq_2) } .set { input } @@ -136,7 +136,7 @@ In this way you can drop items from a channel. We could perform this twice to create one channel containing the FASTQs and one containing the BED files, however Nextflow has a native operator to separate channels called [`.multiMap()`](https://www.nextflow.io/docs/latest/operator.html#multimap). Here, we separate the FASTQs and BEDs into two separate channels using `multiMap`. Note, the channels are both contained in `input` and accessed as an attribute using dot notation: ```groovy -Channel.of(params.input).fromSamplesheet("path/to/json/schema") +Channel.fromList(samplesheetToList(params.input, "path/to/json/schema")) .multiMap { meta, fastq_1, fastq_2, bed -> fastq: tuple(meta, fastq_1, fastq_2) bed: tuple(meta, bed) @@ -163,7 +163,7 @@ This example shows a channel which can have entries for WES or WGS data. WES dat // Channel with four elements - see docs for examples params.input = "samplesheet.csv" -Channel.of(params.input).fromSamplesheet("path/to/json/schema") +Channel.fromList(samplesheetToList(params.input, "path/to/json/schema")) .branch { meta, fastq_1, fastq_2, bed -> // If BED does not exist WGS: !bed @@ -178,13 +178,13 @@ input.WGS.view() // Channel has 3 elements: meta, fastq_1, fastq_2 input.WES.view() // Channel has 4 elements: meta, fastq_1, fastq_2, bed ``` -Unlike `multiMap`, the outputs of `.branch()`, the resulting channels will contain a different number of items. +Unlike `.multiMap()`, the outputs of `.branch()` will contain a different number of items. ## Combining a channel After splitting the channel, it may be necessary to rejoin the channel. There are many ways to join a channel, but here we will demonstrate the simplest which uses the [Nextflow join operator](https://www.nextflow.io/docs/latest/operator.html#join) to rejoin any of the channels from above based on the first element in each item, the `meta` value. -```nextflow +```groovy input.fastq.view() // Channel has 3 elements: meta, fastq_1, fastq_2 input.bed.view() // Channel has 2 elements: meta, bed @@ -204,14 +204,14 @@ It's useful to determine the count of channel entries with similar values when y This example contains a channel where multiple samples can be in the same family. Later on in the pipeline we want to merge the analyzed files so one file gets created for each family. The result will be a channel with an extra meta field containing the count of channel entries with the same family name. ```groovy -// channel created by fromSamplesheet() previous to modification: +// channel created with samplesheetToList() previous to modification: // [[id:example1, family:family1], example1.txt] // [[id:example2, family:family1], example2.txt] // [[id:example3, family:family2], example3.txt] params.input = "sample sheet.csv" -Channel.of(params.input).fromSamplesheet("path/to/json/schema") +Channel.fromList(samplesheetToList(params.input, "path/to/json/schema")) .tap { ch_raw } // Create a copy of the original channel .map { meta, txt -> [ meta.family ] } // Isolate the value to count on .reduce([:]) { counts, family -> // Creates a map like this: [family1:2, family2:1] diff --git a/docs/samplesheets/fromSamplesheet.md b/docs/samplesheets/fromSamplesheet.md deleted file mode 100644 index d99682cb..00000000 --- a/docs/samplesheets/fromSamplesheet.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Create a channel -description: Channel operator to create a channel from a sample sheet. ---- - -# Create a channel from a sample sheet - -## `fromSamplesheet` - -This channel operator validates and converts a sample sheet to ready-to-use channel entries. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). - -The operator has one mandatory argument: the path of the JSON schema file corresponding to the samplesheet. This can be either a string with the relative path (from the root of the pipeline) or a file object of the schema. - -```groovy -Channel.of("path/to/samplesheet").fromSamplesheet("path/to/json/schema") -``` - -## Basic example - -In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/fromSamplesheetBasic), we create a simple channel from a CSV sample sheet. - -``` ---8<-- "examples/fromSamplesheetBasic/log.txt" -``` - -=== "main.nf" - - ```groovy - --8<-- "examples/fromSamplesheetBasic/pipeline/main.nf" - ``` - -=== "samplesheet.csv" - - ```csv - --8<-- "examples/fromSamplesheetBasic/samplesheet.csv" - ``` - -=== "nextflow.config" - - ```groovy - --8<-- "examples/fromSamplesheetBasic/pipeline/nextflow.config" - ``` - -=== "assets/schema_input.json" - - ```json - --8<-- "examples/fromSamplesheetBasic/pipeline/assets/schema_input.json" - ``` - -## Order of fields - -[This example](https://github.com/nextflow-io/nf-schema/tree/master/examples/fromSamplesheetOrder) demonstrates that the order of columns in the sample sheet file has no effect. - -!!! danger - - It is the order of fields **in the sample sheet JSON schema** which defines the order of items in the channel returned by `fromSamplesheet()`, _not_ the order of fields in the sample sheet file. - -``` ---8<-- "examples/fromSamplesheetOrder/log.txt" -``` - -=== "samplesheet.csv" - - ```csv - --8<-- "examples/fromSamplesheetOrder/samplesheet.csv" - ``` - -=== "assets/schema_input.json" - - ```json hl_lines="10 15 20 25" - --8<-- "examples/fromSamplesheetOrder/pipeline/assets/schema_input.json" - ``` - -=== "main.nf" - - ```groovy - --8<-- "examples/fromSamplesheetOrder/pipeline/main.nf" - ``` - -=== "nextflow.config" - - ```groovy - --8<-- "examples/fromSamplesheetOrder/pipeline/nextflow.config" - ``` - -## Channel with meta map - -In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/fromSamplesheetMeta), we use the schema to mark two columns as meta fields. -This returns a channel with a meta map. - -``` ---8<-- "examples/fromSamplesheetMeta/log.txt" -``` - -=== "assets/schema_input.json" - - ```json hl_lines="14 30" - --8<-- "examples/fromSamplesheetMeta/pipeline/assets/schema_input.json" - ``` - -=== "main.nf" - - ```groovy - --8<-- "examples/fromSamplesheetMeta/pipeline/main.nf" - ``` - -=== "samplesheet.csv" - - ```csv - --8<-- "examples/fromSamplesheetMeta/samplesheet.csv" - ``` - -=== "nextflow.config" - - ```groovy - --8<-- "examples/fromSamplesheetMeta/pipeline/nextflow.config" - ``` diff --git a/docs/samplesheets/samplesheetToList.md b/docs/samplesheets/samplesheetToList.md index 9c6b1b59..a1925fdf 100644 --- a/docs/samplesheets/samplesheetToList.md +++ b/docs/samplesheets/samplesheetToList.md @@ -7,7 +7,7 @@ description: Function to create a list from a sample sheet. ## `samplesheetToList` -This function validates and converts a sample sheet to a list in a similar way to the [`fromSamplesheet`](./fromSamplesheet.md) channel operator. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). +This function validates and converts a sample sheet to a list. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). The function has two mandatory arguments: @@ -20,6 +20,126 @@ These can be either a string with the relative path (from the root of the pipeli samplesheetToList("path/to/samplesheet", "path/to/json/schema") ``` +This function can be used together with existing channel factories/operators to create one channel entry per samplesheet entry. + +### Use as a channel factory + +The function can be given to the `.fromList` channel factory to mimic the functionality of a channel factory: + +```groovy +Channel.fromList(samplesheetToList("path/to/samplesheet", "path/to/json/schema")) +``` + !!! note - This function works very similar to the `fromSamplesheet` channel operator. See the [`fromSamplesheet` examples](./fromSamplesheet.md#basic-example) for some examples on how to use this function. + This will mimic the `fromSamplesheet` channel factory as it was in [nf-validation](https://github.com/nextflow-io/nf-validation). + +### Use as a channel oprator + +The function can be used with the `.flatMap` channel operator to create a channel from samplesheets that are already in a channel: + +```groovy +Channel.of("path/to/samplesheet").flatMap { samplesheetToList(it, "path/to/json/schema") } +``` + +## Basic example + +In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/samplesheetToListBasic), we create a simple channel from a CSV sample sheet. + +``` +--8<-- "examples/samplesheetToListBasic/log.txt" +``` + +=== "main.nf" + + ```groovy + --8<-- "examples/samplesheetToListBasic/pipeline/main.nf" + ``` + +=== "samplesheet.csv" + + ```csv + --8<-- "examples/samplesheetToListBasic/samplesheet.csv" + ``` + +=== "nextflow.config" + + ```groovy + --8<-- "examples/samplesheetToListBasic/pipeline/nextflow.config" + ``` + +=== "assets/schema_input.json" + + ```json + --8<-- "examples/samplesheetToListBasic/pipeline/assets/schema_input.json" + ``` + +## Order of fields + +[This example](https://github.com/nextflow-io/nf-schema/tree/master/examples/samplesheetToListOrder) demonstrates that the order of columns in the sample sheet file has no effect. + +!!! danger + + It is the order of fields **in the sample sheet JSON schema** which defines the order of items in the channel returned by `samplesheetToList()`, _not_ the order of fields in the sample sheet file. + +``` +--8<-- "examples/samplesheetToListOrder/log.txt" +``` + +=== "samplesheet.csv" + + ```csv + --8<-- "examples/samplesheetToListOrder/samplesheet.csv" + ``` + +=== "assets/schema_input.json" + + ```json hl_lines="10 15 20 25" + --8<-- "examples/samplesheetToListOrder/pipeline/assets/schema_input.json" + ``` + +=== "main.nf" + + ```groovy + --8<-- "examples/samplesheetToListOrder/pipeline/main.nf" + ``` + +=== "nextflow.config" + + ```groovy + --8<-- "examples/samplesheetToListOrder/pipeline/nextflow.config" + ``` + +## Channel with meta map + +In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/samplesheetToListMeta), we use the schema to mark two columns as meta fields. +This returns a channel with a meta map. + +``` +--8<-- "examples/samplesheetToListMeta/log.txt" +``` + +=== "assets/schema_input.json" + + ```json hl_lines="14 30" + --8<-- "examples/samplesheetToListMeta/pipeline/assets/schema_input.json" + ``` + +=== "main.nf" + + ```groovy + --8<-- "examples/samplesheetToListMeta/pipeline/main.nf" + ``` + +=== "samplesheet.csv" + + ```csv + --8<-- "examples/samplesheetToListMeta/samplesheet.csv" + ``` + +=== "nextflow.config" + + ```groovy + --8<-- "examples/samplesheetToListMeta/pipeline/nextflow.config" + ``` + diff --git a/docs/samplesheets/validate_sample_sheet.md b/docs/samplesheets/validate_sample_sheet.md index 71f856fe..f9c608e2 100644 --- a/docs/samplesheets/validate_sample_sheet.md +++ b/docs/samplesheets/validate_sample_sheet.md @@ -28,6 +28,6 @@ See an example in the `input` field from the [example schema.json](https://raw.g !!! note - The `.fromSamplesheet` channel operator and `samplesheetToList` function also validate the files before converting them. If you convert the samplesheet, it's not necessary to add a schema to the parameter corresponding to the samplesheet. + The `samplesheetToList` function also validate the files before converting them. If you convert the samplesheet, it's not necessary to add a schema to the parameter corresponding to the samplesheet. For more information about the sample sheet JSON schema refer to [sample sheet docs](../nextflow_schema/nextflow_schema_specification.md). diff --git a/examples/fromSamplesheetBasic/pipeline/main.nf b/examples/fromSamplesheetBasic/pipeline/main.nf deleted file mode 100644 index 924be3da..00000000 --- a/examples/fromSamplesheetBasic/pipeline/main.nf +++ /dev/null @@ -1,5 +0,0 @@ -include { fromSamplesheet } from 'plugin/nf-schema' - -ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") - -ch_input.view() diff --git a/examples/fromSamplesheetMeta/pipeline/main.nf b/examples/fromSamplesheetMeta/pipeline/main.nf deleted file mode 100644 index 924be3da..00000000 --- a/examples/fromSamplesheetMeta/pipeline/main.nf +++ /dev/null @@ -1,5 +0,0 @@ -include { fromSamplesheet } from 'plugin/nf-schema' - -ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") - -ch_input.view() diff --git a/examples/fromSamplesheetOrder/pipeline/main.nf b/examples/fromSamplesheetOrder/pipeline/main.nf deleted file mode 100644 index 924be3da..00000000 --- a/examples/fromSamplesheetOrder/pipeline/main.nf +++ /dev/null @@ -1,5 +0,0 @@ -include { fromSamplesheet } from 'plugin/nf-schema' - -ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") - -ch_input.view() diff --git a/examples/fromSamplesheetBasic/launch.sh b/examples/samplesheetToListBasic/launch.sh similarity index 100% rename from examples/fromSamplesheetBasic/launch.sh rename to examples/samplesheetToListBasic/launch.sh diff --git a/examples/fromSamplesheetBasic/log.txt b/examples/samplesheetToListBasic/log.txt similarity index 100% rename from examples/fromSamplesheetBasic/log.txt rename to examples/samplesheetToListBasic/log.txt diff --git a/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json b/examples/samplesheetToListBasic/pipeline/assets/schema_input.json similarity index 100% rename from examples/fromSamplesheetBasic/pipeline/assets/schema_input.json rename to examples/samplesheetToListBasic/pipeline/assets/schema_input.json diff --git a/examples/samplesheetToListBasic/pipeline/main.nf b/examples/samplesheetToListBasic/pipeline/main.nf new file mode 100644 index 00000000..ea045517 --- /dev/null +++ b/examples/samplesheetToListBasic/pipeline/main.nf @@ -0,0 +1,5 @@ +include { samplesheetToList } from 'plugin/nf-schema' + +ch_input = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json")) + +ch_input.view() diff --git a/examples/fromSamplesheetBasic/pipeline/nextflow.config b/examples/samplesheetToListBasic/pipeline/nextflow.config similarity index 100% rename from examples/fromSamplesheetBasic/pipeline/nextflow.config rename to examples/samplesheetToListBasic/pipeline/nextflow.config diff --git a/examples/fromSamplesheetBasic/samplesheet.csv b/examples/samplesheetToListBasic/samplesheet.csv similarity index 100% rename from examples/fromSamplesheetBasic/samplesheet.csv rename to examples/samplesheetToListBasic/samplesheet.csv diff --git a/examples/fromSamplesheetMeta/launch.sh b/examples/samplesheetToListMeta/launch.sh similarity index 100% rename from examples/fromSamplesheetMeta/launch.sh rename to examples/samplesheetToListMeta/launch.sh diff --git a/examples/fromSamplesheetMeta/log.txt b/examples/samplesheetToListMeta/log.txt similarity index 100% rename from examples/fromSamplesheetMeta/log.txt rename to examples/samplesheetToListMeta/log.txt diff --git a/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json b/examples/samplesheetToListMeta/pipeline/assets/schema_input.json similarity index 100% rename from examples/fromSamplesheetMeta/pipeline/assets/schema_input.json rename to examples/samplesheetToListMeta/pipeline/assets/schema_input.json diff --git a/examples/samplesheetToListMeta/pipeline/main.nf b/examples/samplesheetToListMeta/pipeline/main.nf new file mode 100644 index 00000000..ea045517 --- /dev/null +++ b/examples/samplesheetToListMeta/pipeline/main.nf @@ -0,0 +1,5 @@ +include { samplesheetToList } from 'plugin/nf-schema' + +ch_input = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json")) + +ch_input.view() diff --git a/examples/fromSamplesheetMeta/pipeline/nextflow.config b/examples/samplesheetToListMeta/pipeline/nextflow.config similarity index 100% rename from examples/fromSamplesheetMeta/pipeline/nextflow.config rename to examples/samplesheetToListMeta/pipeline/nextflow.config diff --git a/examples/fromSamplesheetMeta/samplesheet.csv b/examples/samplesheetToListMeta/samplesheet.csv similarity index 100% rename from examples/fromSamplesheetMeta/samplesheet.csv rename to examples/samplesheetToListMeta/samplesheet.csv diff --git a/examples/fromSamplesheetOrder/launch.sh b/examples/samplesheetToListOrder/launch.sh similarity index 100% rename from examples/fromSamplesheetOrder/launch.sh rename to examples/samplesheetToListOrder/launch.sh diff --git a/examples/fromSamplesheetOrder/log.txt b/examples/samplesheetToListOrder/log.txt similarity index 100% rename from examples/fromSamplesheetOrder/log.txt rename to examples/samplesheetToListOrder/log.txt diff --git a/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json b/examples/samplesheetToListOrder/pipeline/assets/schema_input.json similarity index 100% rename from examples/fromSamplesheetOrder/pipeline/assets/schema_input.json rename to examples/samplesheetToListOrder/pipeline/assets/schema_input.json diff --git a/examples/samplesheetToListOrder/pipeline/main.nf b/examples/samplesheetToListOrder/pipeline/main.nf new file mode 100644 index 00000000..ea045517 --- /dev/null +++ b/examples/samplesheetToListOrder/pipeline/main.nf @@ -0,0 +1,5 @@ +include { samplesheetToList } from 'plugin/nf-schema' + +ch_input = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json")) + +ch_input.view() diff --git a/examples/fromSamplesheetOrder/pipeline/nextflow.config b/examples/samplesheetToListOrder/pipeline/nextflow.config similarity index 100% rename from examples/fromSamplesheetOrder/pipeline/nextflow.config rename to examples/samplesheetToListOrder/pipeline/nextflow.config diff --git a/examples/fromSamplesheetOrder/samplesheet.csv b/examples/samplesheetToListOrder/samplesheet.csv similarity index 100% rename from examples/fromSamplesheetOrder/samplesheet.csv rename to examples/samplesheetToListOrder/samplesheet.csv diff --git a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy index 1e558f01..f8fbac12 100644 --- a/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-schema/src/main/nextflow/validation/SchemaValidator.groovy @@ -176,56 +176,6 @@ class SchemaValidator extends PluginExtensionPoint { return converter.validateAndConvertToList() } - @Operator - public DataflowWriteChannel fromSamplesheet( - final DataflowReadChannel source, - final CharSequence schema, - final Map options = null - ) { - def String fullPathSchema = Utils.getSchemaPath(session.baseDir.toString(), schema as String) - def Path schemaFile = Nextflow.file(fullPathSchema) as Path - return fromSamplesheet(source, schemaFile, options) - } - - @Operator - public DataflowWriteChannel fromSamplesheet( - final DataflowReadChannel source, - final Path schema, - final Map options = null - ) { - // Logging - def params = session.params - def Boolean useMonochromeLogs = options?.containsKey('monochrome_logs') ? options.monochrome_logs as Boolean : - params.monochrome_logs ? params.monochrome_logs as Boolean : - params.monochromeLogs ? params.monochromeLogs as Boolean : - false - - def colors = Utils.logColours(useMonochromeLogs) - - final target = CH.createBy(source) - final next = { - if(!(it instanceof CharSequence || it instanceof Path)) { - def msg = "${colors.red}The .fromSamplesheet operator only takes a channel with one value per entry (either a String or Path type)\n${colors.reset}\n" - throw new SchemaValidationException(msg) - } - def Path samplesheet = it as Path - if(it instanceof String) { - samplesheet = Nextflow.file(it) as Path - } - def SamplesheetConverter converter = new SamplesheetConverter(samplesheet, schema, params, options) - def List arrayChannel = converter.validateAndConvertToList() - arrayChannel.each { - target.bind(it) - } - } - final done = { - target.bind(Channel.STOP) - } - DataflowHelper.subscribeImpl(source, [onNext: next, onComplete: done]) - return target - } - - // // Initialise expected params if not present // From 131848bfece45ea872401ae477967af5da779f43 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 18 Apr 2024 11:40:53 +0200 Subject: [PATCH 17/18] fix tests --- docs/samplesheets/samplesheetToList.md | 1 - .../SamplesheetConverterTest.groovy | 112 +++++++++++------- 2 files changed, 70 insertions(+), 43 deletions(-) diff --git a/docs/samplesheets/samplesheetToList.md b/docs/samplesheets/samplesheetToList.md index a1925fdf..31715ed9 100644 --- a/docs/samplesheets/samplesheetToList.md +++ b/docs/samplesheets/samplesheetToList.md @@ -142,4 +142,3 @@ This returns a channel with a meta map. ```groovy --8<-- "examples/samplesheetToListMeta/pipeline/nextflow.config" ``` - diff --git a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy index 1bd268b0..2be61836 100644 --- a/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy +++ b/plugins/nf-schema/src/test/nextflow/validation/SamplesheetConverterTest.groovy @@ -59,11 +59,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'should work fine - CSV' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/correct.csv" + params.schema = "src/testResources/schema_input.json" workflow { - Channel.of("src/testResources/correct.csv") - .fromSamplesheet("src/testResources/schema_input.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -86,11 +88,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'should work fine - quoted CSV' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/correct_quoted.csv" + params.schema = "src/testResources/schema_input.json" workflow { - Channel.fromPath('src/testResources/correct_quoted.csv') - .fromSamplesheet("src/testResources/schema_input.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -113,11 +117,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'should work fine - TSV' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/correct.tsv" + params.schema = "src/testResources/schema_input.json" workflow { - Channel.of('src/testResources/correct.tsv') - .fromSamplesheet(file("src/testResources/schema_input.json", checkIfExists:true)) + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -140,11 +146,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'should work fine - YAML' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/correct.yaml" + params.schema = "src/testResources/schema_input.json" workflow { - Channel.of('src/testResources/correct.yaml') - .fromSamplesheet("src/testResources/schema_input.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -167,11 +175,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'should work fine - JSON' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/correct.json" + params.schema = "src/testResources/schema_input.json" workflow { - Channel.of('src/testResources/correct.json') - .fromSamplesheet("src/testResources/schema_input.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -194,11 +204,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'arrays should work fine - YAML' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/correct_arrays.yaml" + params.schema = "src/testResources/schema_input_with_arrays.json" workflow { - Channel.of('src/testResources/correct_arrays.yaml') - .fromSamplesheet("src/testResources/schema_input_with_arrays.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -220,11 +232,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'arrays should work fine - JSON' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/correct_arrays.json" + params.schema = "src/testResources/schema_input_with_arrays.json" workflow { - Channel.of('src/testResources/correct_arrays.json') - .fromSamplesheet("src/testResources/schema_input_with_arrays.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -246,11 +260,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'no header - CSV' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/no_header.csv" + params.schema = "src/testResources/no_header_schema.json" workflow { - Channel.of('src/testResources/no_header.csv') - .fromSamplesheet("src/testResources/no_header_schema.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -270,11 +286,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'no header - YAML' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/no_header.yaml" + params.schema = "src/testResources/no_header_schema.json" workflow { - Channel.of('src/testResources/no_header.yaml') - .fromSamplesheet("src/testResources/no_header_schema.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -294,11 +312,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'no header - JSON' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/no_header.json" + params.schema = "src/testResources/no_header_schema.json" workflow { - Channel.of('src/testResources/no_header.json') - .fromSamplesheet("src/testResources/no_header_schema.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -318,11 +338,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'extra field' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/extraFields.csv" + params.schema = "src/testResources/schema_input.json" workflow { - Channel.of('src/testResources/extraFields.csv') - .fromSamplesheet("src/testResources/schema_input.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -349,11 +371,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'no meta' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/no_meta.csv" + params.schema = "src/testResources/no_meta_schema.json" workflow { - Channel.of('src/testResources/no_meta.csv') - .fromSamplesheet("src/testResources/no_meta_schema.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -373,11 +397,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'deeply nested samplesheet - YAML' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/deeply_nested.yaml" + params.schema = "src/testResources/samplesheet_schema_deeply_nested.json" workflow { - Channel.of('src/testResources/deeply_nested.yaml') - .fromSamplesheet("src/testResources/samplesheet_schema_deeply_nested.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' @@ -397,11 +423,13 @@ class SamplesheetConverterTest extends Dsl2Spec{ def 'deeply nested samplesheet - JSON' () { given: def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-schema' + include { samplesheetToList } from 'plugin/nf-schema' + + params.input = "src/testResources/deeply_nested.json" + params.schema = "src/testResources/samplesheet_schema_deeply_nested.json" workflow { - Channel.of('src/testResources/deeply_nested.json') - .fromSamplesheet("src/testResources/samplesheet_schema_deeply_nested.json") + Channel.fromList(samplesheetToList(params.input, params.schema)) .view() } ''' From 47514c04fbd72109cf7402928417a3736875caa2 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 18 Apr 2024 12:46:03 +0200 Subject: [PATCH 18/18] add a small note about the automatic typing in csv or tsv --- docs/samplesheets/samplesheetToList.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/samplesheets/samplesheetToList.md b/docs/samplesheets/samplesheetToList.md index 31715ed9..bd54cb9d 100644 --- a/docs/samplesheets/samplesheetToList.md +++ b/docs/samplesheets/samplesheetToList.md @@ -20,6 +20,10 @@ These can be either a string with the relative path (from the root of the pipeli samplesheetToList("path/to/samplesheet", "path/to/json/schema") ``` +!!! note + + All data points in the CSV and TSV samplesheets will be converted to their derived type. (e.g. `"true"` will be converted to the Boolean `true` and `"2"` will be converted to the Integer `2`). You can still convert these types back to a String if this is not the expected behaviour with `.map { val -> val.toString() }` + This function can be used together with existing channel factories/operators to create one channel entry per samplesheet entry. ### Use as a channel factory