nf-core · RobJY · Jan 3, 2025 · Jan 10, 2025 · Jan 10, 2025
diff --git a/README.md b/README.md
@@ -63,7 +63,7 @@ channel_number,cycle_number,marker_name
 4,1,CD45RO
 ```
 
-Each row of the markersheet represents a single channel in the associated sample image. The first column `channel_number` is an identifier for the respective channel. The second column `cycle_number` corresponds to the cycle number of the image and it must match the `cycle_number` in the supplied samplesheet. The third column `marker_name` is the name of the marker for the given channel and cycle.
+Each row of the markersheet represents a single channel in the associated sample image. The first required column `channel_number` is an identifier for the respective channel. The second required column `cycle_number` corresponds to the cycle number of the image and it must match the `cycle_number` in the supplied samplesheet. The third required column `marker_name` is the name of the marker for the given channel and cycle. You may provide the markersheet columns in any order.
 
 > [!WARNING]
 > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).

diff --git a/docs/usage.md b/docs/usage.md
@@ -58,7 +58,7 @@ An [example one row per sample samplesheet](../assets/samplesheet_1_row_sample.c
 
 ## Markersheet input
 
-Each row of the markersheet represents a single channel in the associated sample image. The columns `channel_number`, `cycle_number` and `marker_name` are required.
+Each row of the markersheet represents a single channel in the associated sample image. The columns `channel_number`, `cycle_number` and `marker_name` are required. Columns may be in any order as long as the headers are present.
 
 ```csv
 channel_number,cycle_number,marker_name

diff --git a/subworkflows/local/utils_nfcore_mcmicro_pipeline/main.nf b/subworkflows/local/utils_nfcore_mcmicro_pipeline/main.nf
@@ -94,7 +94,7 @@ workflow PIPELINE_INITIALISATION {
 
     ch_markersheet = Channel.fromList(samplesheetToList(params.marker_sheet, "${projectDir}/assets/schema_marker.json"))
         .toList()
-        .map{ validateInputMarkersheet(it) }
+        .map{ validateInputMarkersheet(it, "${projectDir}/assets/schema_marker.json", params) }
         .dump(tag: 'ch_markersheet')
 
     ch_samplesheet.toList()
@@ -183,32 +183,58 @@ def validateInputParameters() {
 //
 // Validate channels from input samplesheet
 //
+def validateInputMarkersheet(markersheet_data, schema_file, params) {
+    def schema = new groovy.json.JsonSlurper().parse(new File(schema_file))
 
-def validateInputMarkersheet( markersheet_data ) {
+    def required_columns = schema.items.required ?: []
+    def properties = schema.items.properties
+
+    // Create a map of column names to their indices based on the order in the schema
+    def column_indices = properties.keySet().withIndex().collectEntries { column, index ->
+        [(column): index]
+    }
 
     def marker_name_list = []
     def channel_number_list = []
     def cycle_number_list = []
 
-    markersheet_data.each { row ->
-        def (channel_number, cycle_number, marker_name) = row
+    // Collect all marker data
+    def marker_data = markersheet_data.collect { row ->
+        [
+            marker_name: row[column_indices['marker_name']],
+            channel_number: row[column_indices['channel_number']] as int,
+            cycle_number: row[column_indices['cycle_number']] as int,
+            background: row[column_indices['background']] != [] ? row[column_indices['background']] : null,
+            exposure: row[column_indices['exposure']] != [] ? row[column_indices['exposure']] : null,
+            remove: column_indices.containsKey('remove') ? (row[column_indices['remove']] != [] ? row[column_indices['remove']] : null) : null
+        ]
+    }
+
+    // Validate basic requirements and collect lists
+    marker_data.each { row ->
+        // Check if all required columns are present and non-empty
+        required_columns.each { column ->
+            if (row[column] == null || row[column].toString().trim().isEmpty()) {
+                error("Missing required value in column '${column}' for marker '${row.marker_name}'.")
+            }
+        }
 
-        if (marker_name_list.contains(marker_name)) {
-            error("Duplicate marker name found in marker sheet!")
+        if (marker_name_list.contains(row.marker_name)) {
+            error("Duplicate marker name found: '${row.marker_name}'")
         } else {
-            marker_name_list.add(marker_name)
+            marker_name_list.add(row.marker_name)
         }
 
-        if (channel_number_list && (channel_number != channel_number_list[-1] && channel_number != channel_number_list[-1] + 1)) {
-            error("Channel_number cannot skip values and must be in order!")
+        if (channel_number_list && (row.channel_number != channel_number_list[-1] && row.channel_number != channel_number_list[-1] + 1)) {
+            error("Channel_number cannot skip values and must be in order! Error at marker '${row.marker_name}'")
         } else {
-            channel_number_list.add(channel_number)
+            channel_number_list.add(row.channel_number)
         }
 
-        if (cycle_number_list && (cycle_number != cycle_number_list[-1] && cycle_number != cycle_number_list[-1] + 1)) {
-            error("Cycle_number cannot skip values and must be in order!")
+        if (cycle_number_list && (row.cycle_number != cycle_number_list[-1] && row.cycle_number != cycle_number_list[-1] + 1)) {
+            error("Cycle_number cannot skip values and must be in order! Error at marker '${row.marker_name}'")
         } else {
-            cycle_number_list.add(cycle_number)
+            cycle_number_list.add(row.cycle_number)
         }
     }
 
@@ -219,26 +245,42 @@ def validateInputMarkersheet( markersheet_data ) {
         error("Duplicate [channel, cycle] pairs: ${dups}")
     }
 
-    // validate backsub columns if present
-    def exposure_list = markersheet_data.findResults{ _1, _2, _3, _4, _5, _6, exposure, _8, _9 -> exposure ?: null }
-    def background_list = markersheet_data.findResults{ _1, _2, _3, _4, _5, _6, _7, background, _9 -> background ?: null }
-    def remove_list = markersheet_data.findResults{ _1, _2, _3, _4, _5, _6, _7, _8, remove -> remove ?: null }
-
-    if (!background_list && (exposure_list || remove_list)) {
-        error("No values in background column, but values in either exposure or remove columns.  Must have background column values to perform background subtraction.")
-    } else if (background_list) {
-        inter_list = marker_name_list.intersect(background_list)
-        if (inter_list.size() != background_list.size()) {
-            outliers_list = background_list - inter_list
-            error('background column values must exist in the marker_name column. The following background column values do not exist in the marker_name column: ' + outliers_list)
+    // Validate backsub data
+    // Check if backsub columns are present
+    def backsub_columns = ['exposure', 'background', 'remove']
+    def has_backsub_columns = markersheet_data.any { row ->
+        backsub_columns.any { column ->
+            column_indices.containsKey(column) && row[column_indices[column]] && row[column_indices[column]] != []
         }
+    }
 
-        if (!exposure_list) {
-            error('You must have at least one value in the exposure column to perform background subtraction')
-        }
+    // Throw error if backsub = false but backsub columns are present
+    if (!params.backsub && has_backsub_columns) {
+        error("Error: exposure, background, or remove columns are present in the marker sheet, but params.backsub is set to false. Either remove these columns or set params.backsub to true.")
+    }
 
-        if (!remove_list) {
-            error ('You must have at least one value in the remove column to perform background subtraction')
+    def has_any_background = marker_data.any { it.background != null }
+
+    if (params.backsub && !has_any_background) {
+        error("Backsub is enabled, but all values in the background column are empty. No subtraction will occur. Either set params.backsub=false or specify how the channel subtraction should be performed.")
+    } else if (has_any_background) {
+        // Create a set of all markers used as background
+        def markers_used_as_background = marker_data.findAll { it.background != null }.collect { it.background }.toSet()
+
+        marker_data.each { row ->
+            if (row.background != null) {
+                if (row.exposure == null) {
+                    error("Missing exposure value for marker '${row.marker_name}' with background '${row.background}'.")
+                }
+                if (!marker_name_list.contains(row.background)) {
+                    error("Background value '${row.background}' specified for marker '${row.marker_name}' does not exist in the marker_name column.")
+                }
+            }
+
+            // Check if this marker is used as a background and ensure it has an exposure value
+            if (markers_used_as_background.contains(row.marker_name) && row.exposure == null) {
+                error("Marker '${row.marker_name}' is used as a background for another marker but does not have an exposure value.")
+            }
         }
     }