galaxyproject · bgruening · Jan 25, 2025 · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024
diff --git a/workflows/scRNAseq/standard-scanpy/.dockstore.yml b/workflows/scRNAseq/standard-scanpy/.dockstore.yml
@@ -0,0 +1,17 @@
+version: 1.2
+workflows:
+- name: Standard-scRNA-seq-with-Scanpy
- name: Standard-scRNA-seq-with-Scanpy
+- name: main
- name: Standard-scRNA-seq-with-Scanpy
+- name: main
+  subclass: Galaxy
+  publish: true
+  primaryDescriptorPath: /Standard-scRNA-seq-with-Scanpy.ga
+  testParameterFiles:
+  - /Standard-scRNA-seq-with-Scanpy-tests.yml
+  authors:
+  - name: Pavankumar Videm
+    orcid: 0000-0002-5192-126X
+  - name: Hans-Rudolf Hotz
+    orcid: 0000-0002-2799-424X
+  - name: Mehmet Tekman
+    orcid: 0000-0002-4181-2676
+  - name: "B\xE9r\xE9nice Batut"
-  - name: "B\xE9r\xE9nice Batut"
+  - name: "Bérénice Batut"
-  - name: "B\xE9r\xE9nice Batut"
+  - name: "Bérénice Batut"
+    orcid: 0000-0001-9852-1987
diff --git a/workflows/scRNAseq/standard-scanpy/CHANGELOG.md b/workflows/scRNAseq/standard-scanpy/CHANGELOG.md
@@ -0,0 +1,3 @@
+## [0.1] 2024-10-09
+
+First release.
diff --git a/workflows/scRNAseq/standard-scanpy/README.md b/workflows/scRNAseq/standard-scanpy/README.md
@@ -0,0 +1,23 @@
+# Standard scRNA-seq Workflow using Scanpy and Anndata
+
+## Inputs dataset
+
+- The workflow needs 4 files as input
+    - A singl-cell count matrix file in Matrix Market Exchange format
-    - A singl-cell count matrix file in Matrix Market Exchange format
+    - A single-cell count matrix file in Matrix Market Exchange format
-    - A singl-cell count matrix file in Matrix Market Exchange format
+    - A single-cell count matrix file in Matrix Market Exchange format
+    - A cell barcodes file with a single barcode in each line. The barcodes should correspond to the cells in the matrix file
+    - A genes/feature tabular file with gene ids and gene symbols
+
+## Processing
+
+- The workflow creates an **Anndata** object from the given input files.
+- Quality control performed. Cells are filtered by number of genes expressed, cells with high mitochondial content are removed.
+- Then counts are normlized and scaled
+- PCA is used for dimensionality reduction and 50 PCs are computed. Various plots are generated to inspect the PCA and PCA loadings that helps in chodeterminingnumber of  PCs to keep for further analysis.
+- Clustering is performed by computing a neighbourhood graph, and then using **louvain** algorithm. neighborhood graph is embeded into UMAP and plotted.
+- Marker genes are identified using **Wilcoxon rank sum test**. Marker genes expressions are visualized in various plots.
+- Optionally, louvain clusters can be annotated with cell types based on the marker genes.
+
+## Outputs
+
+- Final output is an Anndata object with annotations of louvain clusters.
+- Some informative plots from QC to end results
diff --git a/workflows/scRNAseq/standard-scanpy/Standard-scRNA-seq-with-Scanpy-tests.yml b/workflows/scRNAseq/standard-scanpy/Standard-scRNA-seq-with-Scanpy-tests.yml
@@ -0,0 +1,95 @@
+- doc: Test outline for Standard-scRNA-seq-with-Scanpy
+  job:
+    Workflow Params:
+      class: File
+      path: test-data/workflow_params.tabular
+      filetype: tabular
+    Barcodes:
+      class: File
+      location: https://zenodo.org/record/3581213/files/barcodes.tsv
+      filetype: txt
+    Genes:
+      class: File
+      location: https://zenodo.org/record/3581213/files/genes.tsv
+      filetype: tabular
+    Matrix:
+      class: File
+      location: https://zenodo.org/record/3581213/files/matrix.mtx
+      filetype: mtx
+    Input is from Cell Ranger v2 or earlier versions: true
+    Manually annotate celltypes?: true
+    Annotate louvain clusters with these cell types: CD4+ T, CD14+, B, CD8+ T, FCGR3A+,
+      NK, Dendritic, Megakaryocytes
+  outputs:
+    initial_anndata_general_info:
+      asserts:
+        has_text:
+            text: "AnnData object with n_obs × n_vars = 2700 × 32738"
+    pl_scatter_total_counts_vs_n_genes_by_counts:
+      path: test-data/pl_scatter_total_counts_vs_n_genes_by_counts.png
+      compare: sim_size
+    pl_highly_variable:
+      path: test-data/pl_highly_variable.png
+      compare: sim_size
+    pl_pca_loadings:
+      path: test-data/pl_pca_loadings.png
+      compare: sim_size
+    pl_pca_variance_ratio:
+      path: test-data/pl_pca_variance_ratio.png
+      compare: sim_size
+    pl_umap_louvain:
+      path: test-data/pl_umap_louvain.png
+      compare: sim_size
+    uns_rank_genes_groups_names_wilcoxon_test:
+      asserts:
+        has_line:
+            line: "LDHB	LYZ	CD74	CCL5	LST1	NKG7	HLA-DPA1	PF4"
+    pl_rank_gene_groups_t_test_wilcoxon_test:
+      path: test-data/pl_rank_gene_groups_t_test_wilcoxon_test.png
+      compare: sim_size
+    final_anndata_general_info:
+      path: test-data/final_anndata_general_info.txt
+    cells_per_cluster:
+      asserts:
+        has_line:
+            line: "0	1162"
+            line: "3	311"
+            line: "6	34"
+    pl_scatter_n_genes_by_counts_vs_pct_mito:
+      path: test-data/pl_scatter_n_genes_by_counts_vs_pct_mito.png
+      compare: sim_size
+    pl_violin_initial:
+      path: test-data/pl_violin_initial.png
+      compare: sim_size
+    anndata_with_raw:
+      asserts:
+        has_h5_keys:
+            keys: "obs/log1p_total_counts"
+            keys: "obs/total_counts_mito"
+            keys: "var/mito"
+            keys: "var/norm"
+            keys: "uns/log1p"
+    pl_pca_overview_genes:
+      path: test-data/pl_pca_overview_genes.png
+      compare: sim_size
+    pl_rank_genes_heatmap:
+      path: test-data/pl_rank_genes_heatmap.png
+      compare: sim_size
+    anndata_out:
+      asserts:
+        has_h5_keys:
+            keys: "obs/louvain"
+            keys: "var/highly_variable"
+            keys: "uns/rank_genes_groups"
+    pl_umap_marker_genes:
+      path: test-data/pl_umap_marker_genes.png
+      compare: sim_size
+    pl_stacked_violin_marker_genes:
+      path: test-data/pl_stacked_violin_marker_genes.png
+      compare: sim_size
+    pl_violin_louvain:
+      path: test-data/pl_violin_louvain.png
+      compare: sim_size
+    pl_dotplot_marker_genes:
+      path: test-data/pl_dotplot_marker_genes.png
+      compare: sim_size