From 25f0b9c348865ed15facfbd9a688a8a7c7fb8525 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 11 Apr 2024 10:12:15 +0200 Subject: [PATCH] update docs with the changes --- CHANGELOG.md | 136 +----------------- README.md | 7 +- docs/background.md | 2 + docs/migration_guide.md | 43 ++++-- docs/nextflow_schema/create_schema.md | 2 + docs/samplesheets/examples.md | 12 +- docs/samplesheets/fromSamplesheet.md | 47 +----- docs/samplesheets/samplesheetToList.md | 24 ++++ docs/samplesheets/validate_sample_sheet.md | 4 + .../pipeline/assets/schema_input.json | 12 +- .../fromSamplesheetBasic/pipeline/main.nf | 2 +- .../pipeline/nextflow_schema.json | 39 ----- .../pipeline/assets/schema_input.json | 12 +- examples/fromSamplesheetMeta/pipeline/main.nf | 2 +- .../pipeline/nextflow_schema.json | 39 ----- .../pipeline/assets/schema_input.json | 12 +- .../fromSamplesheetOrder/pipeline/main.nf | 2 +- .../pipeline/nextflow_schema.json | 39 ----- 18 files changed, 90 insertions(+), 346 deletions(-) create mode 100644 docs/samplesheets/samplesheetToList.md delete mode 100644 examples/fromSamplesheetBasic/pipeline/nextflow_schema.json delete mode 100644 examples/fromSamplesheetMeta/pipeline/nextflow_schema.json delete mode 100644 examples/fromSamplesheetOrder/pipeline/nextflow_schema.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 520d2732..f09b0381 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,12 @@ # Version 2.0.0 - Kagoshima -:warning: This version contains a number of breaking changes. Please read the changelog carefully before upgrading. :warning: - -To migrate your schemas please follow the [migration guide](https://nextflow-io.github.io/nf-validation/latest/migration_guide/) +To migrate from nf-validation please follow the [migration guide](https://nextflow-io.github.io/nf-validation/latest/migration_guide/) ## New features - Added the `uniqueEntries` keyword. This keyword takes a list of strings corresponding to names of fields that need to be a unique combination. e.g. `uniqueEntries: ['sample', 'replicate']` will make sure that the combination of the `sample` and `replicate` fields is unique. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Added `samplesheetToList` which is the function equivalent of `.fromSamplesheet` [#3](https://github.com/nextflow-io/nf-schema/pull/3) ## Changes @@ -16,6 +15,7 @@ To migrate your schemas please follow the [migration guide](https://nextflow-io. - Removed all validation code from the `.fromSamplesheet()` channel factory. The validation is now solely done in the `validateParameters()` function. A custom error message will now be displayed if any error has been encountered during the conversion ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - Removed the `unique` keyword from the samplesheet schema. You should now use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or `uniqueEntries` instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - Removed the `skip_duplicate_check` option from the `fromSamplesheet()` channel factory and the `--validationSkipDuplicateCheck` parameter. You should now use the `uniqueEntries` or [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) keywords in the schema instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- `.fromSamplesheet()` now is a channel operator instead of a channel factory. It takes one required argument which can either be a string containing the relative path to the schema or a file object of the schema [#3](https://github.com/nextflow-io/nf-schema/pull/3) - `.fromSamplesheet()` now does dynamic typecasting instead of using the `type` fields in the JSON schema. This is done due to the complexity of `draft-2020-12` JSON schemas. This should not have that much impact but keep in mind that some types can be different between this and earlier versions because of this ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - `.fromSamplesheet()` will now set all missing values as `[]` instead of the type specific defaults (because of the changes in the previous point). This should not change that much as this will also result in `false` when used in conditions. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) @@ -25,133 +25,3 @@ To migrate your schemas please follow the [migration guide](https://nextflow-io. - The `schema` keyword will now work in all schemas. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - Improved the error messages ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - `.fromSamplesheet()` now supports deeply nested samplesheets ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) - -# Version 1.1.3 - Asahikawa - -## Improvements - -- Added support for double quotes (`"`) in CSV and TSV samplesheets ([#134](https://github.com/nextflow-io/nf-validation/pull/134)) - -# Version 1.1.2 - Wakayama - -## Bug fixes - -- Fixed an issue with inputs using `file-path-pattern` where only one file was found (`Path` casting to `ArrayList` error) ([#132](https://github.com/nextflow-io/nf-validation/pull/132)) - -# Version 1.1.1 - Shoyu - -## Bug fixes - -- Fixed an issue where samplesheet with a lot of null values would take forever to validate ([#120](https://github.com/nextflow-io/nf-validation/pull/120)) => Thanks @awgymer for fixing this! -- Now YAML files are actually validated instead of skipped ([#124](https://github.com/nextflow-io/nf-validation/pull/120)) - -# Version 1.1.0 - Miso - -## Features - -- Add support for samplesheets with no header ([#115](https://github.com/nextflow-io/nf-validation/pull/115)) - -## Bug fixes - -- Floats and doubles should now be created when using the `number` type in the schema ([#113](https://github.com/nextflow-io/nf-validation/pull/113/)) -- When `0` is used as a default value in the schema, a `0` will now be used as the value in the `.fromSamplesheet()` channel instead of `null` ([#114](https://github.com/nextflow-io/nf-validation/pull/114)) - -## New features - -- Added `file-path-pattern` format to check every file fetched using a glob pattern. Using a glob is now also possible in the samplesheet and will create a list of all files found using that glob pattern. ([#118](https://github.com/nextflow-io/nf-validation/pull/118)) - -# Version 1.0.0 - Tonkotsu - -The nf-validation plugin is now in production use across many pipelines and has (we hope) now reached a point of relative stability. The bump to major version v1.0.0 signifies that it is suitable for use in production pipelines. - -This version also introduces a small breaking change of syntax when providing optional arguments to the functions. You can now provide optional arguments such as the nextflow parameters schema path as: -`validateParameters(parameters_schema: 'my_file.json')` - -(previous syntax used positional arguments instead). - -## Bug fixes - -- The path to a custom parameters schema must be provided through a map '`parameters_schema: 'my_file.json'`' in `validateParameters()` and `paramsSummaryMap()` ([#108](https://github.com/nextflow-io/nf-validation/pull/108)) - -# Version 0.3.4 - -This version introduced a bug which made all pipeline runs using the function `validateParameters()` without providing any arguments fail. - -This bug causes Nextflow to exit with an error on launch for most pipelines. It should not be used. It was [removed](https://github.com/nextflow-io/plugins/pull/40) from the Nextflow Plugin registry to avoid breaking people's runs. - -### Bug fixes - -- Do not check S3 URL paths with `PathValidator` `FilePathValidator` and `DirectoryPathValidator` ([#106](https://github.com/nextflow-io/nf-validation/pull/106)) -- Make monochrome_logs an option in `paramsSummaryLog()`, `paramsSummaryMap()` and `paramsHelp()` instead of a global parameter ([#101](https://github.com/nextflow-io/nf-validation/pull/101)) - -# Version 0.3.3 - -### Bug fixes - -- Do not check if S3 URL paths exists to avoid AWS errors, and add a new parameter `validationS3PathCheck` ([#104](https://github.com/nextflow-io/nf-validation/pull/104)) - -# Version 0.3.2 - -### Bug fixes - -- Add parameters defined on the top level of the schema and within the definitions section as expected params ([#79](https://github.com/nextflow-io/nf-validation/pull/79)) -- Fix error when a parameter is not present in the schema and evaluates to false ([#89](https://github.com/nextflow-io/nf-validation/pull/89)) -- Changed the `schema_filename` option of `fromSamplesheet` to `parameters_schema` to make this option more clear to the user ([#91](https://github.com/nextflow-io/nf-validation/pull/91)) - -## Version 0.3.1 - -### Bug fixes - -- Don't check if path exists if param is not true ([#74](https://github.com/nextflow-io/nf-validation/pull/74)) -- Don't validate a file if the parameter evaluates to false ([#75](https://github.com/nextflow-io/nf-validation/pull/75)) - -## Version 0.3.0 - -### New features - -- Check that a sample sheet doesn't have duplicated entries by default. Can be disabled with `--validationSkipDuplicateCheck` ([#72](https://github.com/nextflow-io/nf-validation/pull/72)) - -### Bug fixes - -- Only validate a path if it is not null ([#50](https://github.com/nextflow-io/nf-validation/pull/50)) -- Only validate a file with a schema if the file path is provided ([#51](https://github.com/nextflow-io/nf-validation/pull/51)) -- Handle errors when sample sheet not provided or doesn't have a schema ([#56](https://github.com/nextflow-io/nf-validation/pull/56)) -- Silently ignore samplesheet fields that are not defined in samplesheet schema ([#59](https://github.com/nextflow-io/nf-validation/pull/59)) -- Correctly handle double-quoted fields containing commas in csv files by `.fromSamplesheet()` ([#63](https://github.com/nextflow-io/nf-validation/pull/63)) -- Print param name when path does not exist ([#65](https://github.com/nextflow-io/nf-validation/pull/65)) -- Fix file or directory does not exist error not printed when it was the only error in a samplesheet ([#65](https://github.com/nextflow-io/nf-validation/pull/65)) -- Do not return parameter in summary if it has no default in the schema and is set to 'false' ([#66](https://github.com/nextflow-io/nf-validation/pull/66)) -- Skip the validation of a file if the path is an empty string and improve error message when the path is invalid ([#69](https://github.com/nextflow-io/nf-validation/pull/69)) - -### Deprecated - -- The meta map of input channels is not an ImmutableMap anymore ([#68](https://github.com/nextflow-io/nf-validation/pull/68)). Reason: [Issue #52](https://github.com/nextflow-io/nf-validation/issues/52) - -## Version 0.2.1 - -### Bug fixes - -- Fixed a bug where `immutable_meta` option in `fromSamplesheet()` wasn't working when using `validateParameters()` first. (@nvnieuwk) - -## Version 0.2.0 - -### New features - -- Added a new [documentation site](https://nextflow-io.github.io/nf-validation/). (@ewels and @mashehu) -- Removed the `file-path-exists`, `directory-path-exists` and `path-exists` and added a [`exists`](https://nextflow-io.github.io/nf-validation/nextflow_schema/nextflow_schema_specification/#exists) parameter to the schema. (@mirpedrol) -- New [`errorMessage`](https://nextflow-io.github.io/nf-validation/nextflow_schema/nextflow_schema_specification/#errormessage) parameter for the schema which can be used to create custom error messages. (@mirpedrol) -- Samplesheet validation now happens in `validateParameters()` using the schema specified by the `schema` parameter in the parameters schema. (@mirpedrol) - -### Improvements - -- The `meta` maps are now immutable by default, see [`ImmutableMap`](https://nextflow-io.github.io/nf-validation/samplesheets/immutable_map/) for more info (@nvnieuwk) -- `validateAndConvertSamplesheet()` has been renamed to `fromSamplesheet()` -- Refactor `--schema_ignore_params` to `--validationSchemaIgnoreParams` - -### Bug fixes - -- Fixed a bug where an empty meta map would be created when no meta values are in the samplesheet schema. (@nvnieuwk) - -## Version 0.1.0 - -Initial release. diff --git a/README.md b/README.md index 8b6b13ac..1f0f432d 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,8 @@ validateParameters() // Print summary of supplied parameters log.info paramsSummaryLog(workflow) -// Create a new channel of metadata from a sample sheet -// NB: `input` corresponds to `params.input` and associated sample sheet schema -ch_input = Channel.fromSamplesheet("input") +// Create a new channel of metadata from a sample sheet passed to the pipeline through the --input parameter +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ``` ## Dependencies @@ -62,7 +61,7 @@ ch_input = Channel.fromSamplesheet("input") ## Slack channel -There is a dedicated [nf-validation Slack channel](https://nfcore.slack.com/archives/C056RQB10LU) in the [Nextflow Slack workspace](nextflow.slack.com). +There is a dedicated [nf-validation Slack channel](https://nfcore.slack.com/archives/C056RQB10LU) in the [Nextflow Slack workspace](https://nextflow.slack.com). ## Credits diff --git a/docs/background.md b/docs/background.md index c0091c6c..95dc61bf 100644 --- a/docs/background.md +++ b/docs/background.md @@ -15,3 +15,5 @@ In addition to config params, a common best-practice for pipelines is to use a " Nextflow itself does not provide functionality to validate config parameters or parsed sample sheets. To bridge this gap, we developed code within the [nf-core community](https://nf-co.re/) to allow pipelines to work with a standard `nextflow_schema.json` file, written using the [JSON Schema](https://json-schema.org/) format. The file allows strict typing of parameter variables and inclusion of validation rules. The nf-schema plugin moves this code out of the nf-core template into a stand-alone package, to make it easier to use for the wider Nextflow community. It also incorporates a number of new features, such as native Groovy sample sheet validation. + +Earlier versions of the plugin can be found in the [nf-validation](https://github.com/nextflow-io/nf-validation) repository and can still be used in the pipeline. However the nf-validation plugin is no longer supported and all development has been moved to nf-schema. diff --git a/docs/migration_guide.md b/docs/migration_guide.md index 09d74ac4..0db3f9f9 100644 --- a/docs/migration_guide.md +++ b/docs/migration_guide.md @@ -1,21 +1,22 @@ --- title: Migration guide -description: Guide to migrate pipelines using nf-schema pre v2.0.0 to after v2.0.0 +description: Guide to migrate pipelines from nf-validation to nf-schema hide: - toc --- # Migration guide -This guide is intended to help you migrate your pipeline from older versions of the plugin to version 2.0.0 and later. +This guide is intended to help you migrate your pipeline from [nf-validation](https://github.com/nextflow-io/nf-validation) to nf-schema. ## Major changes in the plugin -Following list shows the major breaking changes introduced in version 2.0.0: +Following list shows the major breaking changes introduced in nf-schema: 1. The JSON schema draft has been updated from `draft-07` to `draft-2020-12`. See [JSON Schema draft 2020-12 release notes](https://json-schema.org/draft/2020-12/release-notes) and [JSON schema draft 2019-09 release notes](https://json-schema.org/draft/2019-09/release-notes) for more information. -2. The `unique` keyword for samplesheet schemas has been removed. Please use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or [`uniqueEntries`](nextflow_schema/nextflow_schema_specification.md#uniqueentries) now instead. -3. The `dependentRequired` keyword now works as it's supposed to work in JSON schema. See [`dependentRequired`](https://json-schema.org/understanding-json-schema/reference/conditionals#dependentRequired) for more information +2. The `fromSamplesheet` channel factory has been converted to a channel operator. See [updating `fromSamplesheet`](#updating-fromsamplesheet) for more information. +3. The `unique` keyword for samplesheet schemas has been removed. Please use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or [`uniqueEntries`](nextflow_schema/nextflow_schema_specification.md#uniqueentries) now instead. +4. The `dependentRequired` keyword now works as it's supposed to work in JSON schema. See [`dependentRequired`](https://json-schema.org/understanding-json-schema/reference/conditionals#dependentRequired) for more information A full list of changes can be found in the [changelog](https://github.com/nextflow-io/nf-schema/blob/master/CHANGELOG.md). @@ -31,9 +32,27 @@ This will replace the old schema draft specification (`draft-07`) by the new one !!! note - Repeat this command for every JSON schema you use in your pipeline. e.g. for the default samplesheet schema: + Repeat this command for every JSON schema you use in your pipeline. e.g. for the default samplesheet schema in nf-core pipelines: `bash sed -i -e 's/http:\/\/json-schema.org\/draft-07\/schema/https:\/\/json-schema.org\/draft\/2020-12\/schema/g' -e 's/definitions/defs/g' assets/schema_input.json ` +Next you should update the `.fromSamplesheet` channel factory to the channel operator. Following tabs shows the difference between the versions: + +=== "nf-validation" + + ```groovy + Channel.fromSamplesheet("input") + ``` + +=== "nf-schema" + + ```groovy + Channel.of(params.input).fromSamplesheet("path/to/samplesheet/schema") + ``` + +!!! note + + This change was necessary to make it possible for pipelines to be used as pluggable workflows. This also enables the validation and conversion of files generated by the pipeline. + If you are using any special features in your schemas, you will need to update your schemas manually. Please refer to the [JSON Schema draft 2020-12 release notes](https://json-schema.org/draft/2020-12/release-notes) and [JSON schema draft 2019-09 release notes](https://json-schema.org/draft/2019-09/release-notes) for more information. However here are some guides to the more common migration patterns: @@ -44,7 +63,7 @@ When you use `unique` in your schemas, you should update it to use `uniqueItems` If you used the `unique:true` field, you should update it to use `uniqueItems` like this: -=== "Before v2.0" +=== "nf-validation" ```json hl_lines="9" { @@ -62,7 +81,7 @@ If you used the `unique:true` field, you should update it to use `uniqueItems` l } ``` -=== "After v2.0" +=== "nf-schema" ```json hl_lines="12" { @@ -82,7 +101,7 @@ If you used the `unique:true` field, you should update it to use `uniqueItems` l If you used the `unique: ["field1", "field2"]` field, you should update it to use `uniqueEntries` like this: -=== "Before v2.0" +=== "nf-validation" ```json hl_lines="9" { @@ -100,7 +119,7 @@ If you used the `unique: ["field1", "field2"]` field, you should update it to us } ``` -=== "After v2.0" +=== "nf-schema" ```json hl_lines="12" { @@ -122,7 +141,7 @@ If you used the `unique: ["field1", "field2"]` field, you should update it to us When you use `dependentRequired` in your schemas, you should update it like this: -=== "Before v2.0" +=== "nf-validation" ```json hl_lines="12" { @@ -142,7 +161,7 @@ When you use `dependentRequired` in your schemas, you should update it like this } ``` -=== "After v2.0" +=== "nf-schema" ```json hl_lines="14 15 16" { diff --git a/docs/nextflow_schema/create_schema.md b/docs/nextflow_schema/create_schema.md index 2b248e5e..5fd9ea57 100644 --- a/docs/nextflow_schema/create_schema.md +++ b/docs/nextflow_schema/create_schema.md @@ -76,4 +76,6 @@ This web interface is where you should add detail to your schema, customising th There is currently no tooling to help you write sample sheet schema :anguished: + You can find an example in [Example sample sheet schema](sample_sheet_schema_examples.md) + Watch this space.. diff --git a/docs/samplesheets/examples.md b/docs/samplesheets/examples.md index 170bea9a..88c7a1c5 100644 --- a/docs/samplesheets/examples.md +++ b/docs/samplesheets/examples.md @@ -7,7 +7,7 @@ description: Examples of advanced sample sheet creation techniques. ## Introduction -Understanding channel structure and manipulation is critical for getting the most out of Nextflow. nf-schema helps initialise your channels from the text inputs to get you started, but further work might be required to fit your exact use case. In this page we run through some common cases for transforming the output of `.fromSamplesheet`. +Understanding channel structure and manipulation is critical for getting the most out of Nextflow. nf-schema helps initialise your channels from the text inputs to get you started, but further work might be required to fit your exact use case. In this page we run through some common cases for transforming the output of `.fromSamplesheet()`. ### Glossary @@ -104,7 +104,7 @@ val(meta), path(fastq_1), path(fastq_2) To manipulate each item within a channel, you should use the [Nextflow `.map()` operator](https://www.nextflow.io/docs/latest/operator.html#map). This will apply a function to each element of the channel in turn. Here, we convert the flat tuple into a tuple composed of a meta and a list of FASTQ files: ```groovy -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .map { meta, fastq_1, fastq_2 -> tuple(meta, [ fastq_1, fastq_2 ]) } .set { input } @@ -122,7 +122,7 @@ ZCAT_FASTQS(input) For example, to remove the BED file from the channel created above, we could not return it from the map. Note the absence of the `bed` item in the return of the closure below: ```groovy -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .map { meta, fastq_1, fastq_2, bed -> tuple(meta, fastq_1, fastq_2) } .set { input } @@ -136,7 +136,7 @@ In this way you can drop items from a channel. We could perform this twice to create one channel containing the FASTQs and one containing the BED files, however Nextflow has a native operator to separate channels called [`.multiMap()`](https://www.nextflow.io/docs/latest/operator.html#multimap). Here, we separate the FASTQs and BEDs into two separate channels using `multiMap`. Note, the channels are both contained in `input` and accessed as an attribute using dot notation: ```groovy -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .multiMap { meta, fastq_1, fastq_2, bed -> fastq: tuple(meta, fastq_1, fastq_2) bed: tuple(meta, bed) @@ -163,7 +163,7 @@ This example shows a channel which can have entries for WES or WGS data. WES dat // Channel with four elements - see docs for examples params.input = "samplesheet.csv" -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .branch { meta, fastq_1, fastq_2, bed -> // If BED does not exist WGS: !bed @@ -211,7 +211,7 @@ This example contains a channel where multiple samples can be in the same family params.input = "sample sheet.csv" -Channel.fromSamplesheet("input") +Channel.of(params.input).fromSamplesheet("path/to/json/schema") .tap { ch_raw } // Create a copy of the original channel .map { meta, txt -> [ meta.family ] } // Isolate the value to count on .reduce([:]) { counts, family -> // Creates a map like this: [family1:2, family2:1] diff --git a/docs/samplesheets/fromSamplesheet.md b/docs/samplesheets/fromSamplesheet.md index 6a4f56e3..9f21eb54 100644 --- a/docs/samplesheets/fromSamplesheet.md +++ b/docs/samplesheets/fromSamplesheet.md @@ -1,36 +1,19 @@ --- title: Create a channel -description: Channel factory to create a channel from a sample sheet. +description: Channel operator to create a channel from a sample sheet. --- # Create a channel from a sample sheet ## `fromSamplesheet` -This function validates and converts a sample sheet to a ready-to-use Nextflow channel. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). +This channel operator validates and converts a sample sheet to ready-to-use channel entries. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). -The function has one mandatory argument: the name of the parameter which specifies the input sample sheet. The parameter specified must have the format `file-path` and include additional field `schema`: +The operator has one mandatory argument: the path of the JSON schema file corresponding to the samplesheet. This can be either a string with the relative path (from the root of the pipeline) or a file object of the schema. -```json hl_lines="4" -{ - "type": "string", - "format": "file-path", - "schema": "assets/foo_schema.json" -} -``` - -The path specified in the `schema` key determines the JSON used for validation of the sample sheet. - -When using the `.fromSamplesheet` channel factory, one optional arguments can be used: - -- `parameters_schema`: File name for the pipeline parameters schema. (Default: `nextflow_schema.json`) ```groovy -Channel.fromSamplesheet('input') -``` - -```groovy -Channel.fromSamplesheet('input', parameters_schema: 'custom_nextflow_schema.json') +Channel.of("path/to/samplesheet").fromSamplesheet("path/to/json/schema") ``` ## Basic example @@ -59,12 +42,6 @@ In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/ --8<-- "examples/fromSamplesheetBasic/pipeline/nextflow.config" ``` -=== "nextflow_schema.json" - - ```json hl_lines="19" - --8<-- "examples/fromSamplesheetBasic/pipeline/nextflow_schema.json" - ``` - === "assets/schema_input.json" ```json @@ -91,7 +68,7 @@ In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/ === "assets/schema_input.json" - ```json hl_lines="10 15 20 33" + ```json hl_lines="10 15 20 25" --8<-- "examples/fromSamplesheetOrder/pipeline/assets/schema_input.json" ``` @@ -107,12 +84,6 @@ In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/ --8<-- "examples/fromSamplesheetOrder/pipeline/nextflow.config" ``` -=== "nextflow_schema.json" - - ```json - --8<-- "examples/fromSamplesheetOrder/pipeline/nextflow_schema.json" - ``` - ## Channel with meta map In [this example](https://github.com/nextflow-io/nf-schema/tree/master/examples/fromSamplesheetMeta), we use the schema to mark two columns as meta fields. @@ -124,7 +95,7 @@ This returns a channel with a meta map. === "assets/schema_input.json" - ```json hl_lines="14 38" + ```json hl_lines="14 30" --8<-- "examples/fromSamplesheetMeta/pipeline/assets/schema_input.json" ``` @@ -145,9 +116,3 @@ This returns a channel with a meta map. ```groovy --8<-- "examples/fromSamplesheetMeta/pipeline/nextflow.config" ``` - -=== "nextflow_schema.json" - - ```json - --8<-- "examples/fromSamplesheetMeta/pipeline/nextflow_schema.json" - ``` diff --git a/docs/samplesheets/samplesheetToList.md b/docs/samplesheets/samplesheetToList.md new file mode 100644 index 00000000..933e1f62 --- /dev/null +++ b/docs/samplesheets/samplesheetToList.md @@ -0,0 +1,24 @@ +--- +title: Create a list +description: Function to create a list from a sample sheet. +--- + +# Create a list from a sample sheet + +## `samplesheetToList` + +This function validates and converts a sample sheet to a list in a similar way to the [`fromSamplesheet`](./fromSamplesheet.md) channel operator. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). + +The function has two mandatory arguments: +1. The path to the samplesheet +2. The path to the JSON schema file corresponding to the samplesheet. + +These can be either a string with the relative path (from the root of the pipeline) or a file object of the schema. + +```groovy +samplesheetToList("path/to/samplesheet", "path/to/json/schema") +``` + +!!! note + + This function works very similar to the `fromSamplesheet` channel operator. See the [`fromSamplesheet` examples](./fromSamplesheet.md#basic-example) for some examples on how to use this function. diff --git a/docs/samplesheets/validate_sample_sheet.md b/docs/samplesheets/validate_sample_sheet.md index 9fef1eaf..ccede6c0 100644 --- a/docs/samplesheets/validate_sample_sheet.md +++ b/docs/samplesheets/validate_sample_sheet.md @@ -26,4 +26,8 @@ See an example in the `input` field from the [example schema.json](https://raw.g } ``` +!!! warning + + The `.fromSamplesheet` channel operator and `samplesheetToList` also validate the files before converting them. If you convert the samplesheet, you should not add a schema to the parameter corresponding to the samplesheet to keep your pipeline as efficient as possible. + For more information about the sample sheet JSON schema refer to [sample sheet docs](../nextflow_schema/nextflow_schema_specification.md). diff --git a/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json b/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json index aa527ed5..56f6a959 100644 --- a/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json @@ -19,16 +19,8 @@ }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, "strandedness": { "type": "string", diff --git a/examples/fromSamplesheetBasic/pipeline/main.nf b/examples/fromSamplesheetBasic/pipeline/main.nf index a02f1ac8..924be3da 100644 --- a/examples/fromSamplesheetBasic/pipeline/main.nf +++ b/examples/fromSamplesheetBasic/pipeline/main.nf @@ -1,5 +1,5 @@ include { fromSamplesheet } from 'plugin/nf-schema' -ch_input = Channel.fromSamplesheet("input") +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ch_input.view() diff --git a/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json b/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json deleted file mode 100644 index 6096ceb9..00000000 --- a/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", - "title": "nf-core/testpipeline pipeline parameters", - "description": "this is a test", - "type": "object", - "defs": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "schema": "assets/schema_input.json", - "pattern": "^\\S+\\.(csv|tsv|yaml|json)$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - } - } - } - }, - "allOf": [ - { - "$ref": "#/defs/input_output_options" - } - ] -} diff --git a/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json b/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json index 7a931a25..ab42363a 100644 --- a/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json @@ -20,16 +20,8 @@ }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, "strandedness": { "type": "string", diff --git a/examples/fromSamplesheetMeta/pipeline/main.nf b/examples/fromSamplesheetMeta/pipeline/main.nf index a02f1ac8..924be3da 100644 --- a/examples/fromSamplesheetMeta/pipeline/main.nf +++ b/examples/fromSamplesheetMeta/pipeline/main.nf @@ -1,5 +1,5 @@ include { fromSamplesheet } from 'plugin/nf-schema' -ch_input = Channel.fromSamplesheet("input") +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ch_input.view() diff --git a/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json b/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json deleted file mode 100644 index 6096ceb9..00000000 --- a/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", - "title": "nf-core/testpipeline pipeline parameters", - "description": "this is a test", - "type": "object", - "defs": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "schema": "assets/schema_input.json", - "pattern": "^\\S+\\.(csv|tsv|yaml|json)$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - } - } - } - }, - "allOf": [ - { - "$ref": "#/defs/input_output_options" - } - ] -} diff --git a/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json b/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json index a51e24f6..fbbd703e 100644 --- a/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json @@ -19,16 +19,8 @@ }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, "fastq_1": { "type": "string", diff --git a/examples/fromSamplesheetOrder/pipeline/main.nf b/examples/fromSamplesheetOrder/pipeline/main.nf index a02f1ac8..924be3da 100644 --- a/examples/fromSamplesheetOrder/pipeline/main.nf +++ b/examples/fromSamplesheetOrder/pipeline/main.nf @@ -1,5 +1,5 @@ include { fromSamplesheet } from 'plugin/nf-schema' -ch_input = Channel.fromSamplesheet("input") +ch_input = Channel.of(params.input).fromSamplesheet("assets/schema_input.json") ch_input.view() diff --git a/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json b/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json deleted file mode 100644 index 6096ceb9..00000000 --- a/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", - "title": "nf-core/testpipeline pipeline parameters", - "description": "this is a test", - "type": "object", - "defs": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "schema": "assets/schema_input.json", - "pattern": "^\\S+\\.(csv|tsv|yaml|json)$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - } - } - } - }, - "allOf": [ - { - "$ref": "#/defs/input_output_options" - } - ] -}