-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #223 from microbiomedata/222-write-metatranscripto…
…me-workflows-yaml Initial metatranscriptome workflow
- Loading branch information
Showing
2 changed files
with
388 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,380 @@ | ||
Workflows: | ||
- Name: Sequencing Noninterleaved | ||
Collection: omics_processing_set | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Filter Output Objects: | ||
- Metagenome Raw Read 1 | ||
- Metagenome Raw Read 2 | ||
|
||
- Name: Sequencing Interleaved | ||
Collection: omics_processing_set | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Filter Output Objects: | ||
- Metagenome Raw Reads | ||
|
||
- Name: Metatranscriptome Reads QC | ||
Type: nmdc:ReadQcAnalysisActivity | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Git_repo: https://github.com/microbiomedata/metaT_ReadsQC | ||
Version: v0.0.3 | ||
WDL: rqcfilter.wdl | ||
Collection: read_qc_analysis_activity_set | ||
Filter Input Objects: | ||
- Metagenome Raw Reads | ||
Predecessors: | ||
- Sequencing | ||
- Sequencing Interleaved | ||
Input_prefix: nmdc_rqcfilter | ||
Inputs: | ||
input_files: do:Metagenome Raw Reads | ||
proj: "{activity_id}" | ||
Activity: | ||
name: "Read QC Activity for {id}" | ||
input_read_bases: "{outputs.stats.input_read_bases}" | ||
input_read_count: "{outputs.stats.input_read_count}" | ||
output_read_bases: "{outputs.stats.output_read_bases}" | ||
output_read_count: "{outputs.stats.output_read_count}" | ||
type: nmdc:ReadQcAnalysisActivity | ||
Outputs: | ||
- output: filtered_final | ||
name: Reads QC result fastq (clean data) | ||
data_object_type: Filtered Sequencing Reads | ||
description: "Reads QC for {id}" | ||
- output: filtered_stats_final | ||
name: Reads QC summary statistics | ||
data_object_type: QC Statistics | ||
description: "Reads QC summary for {id}" | ||
- output: rqc_info | ||
name: File containing read filtering information | ||
data_object_type: Read Filtering Info File | ||
description: "Read filtering info for {id}" | ||
- output: filtered_ribo_final | ||
name: Fastq file containing filtered ribosomal sequences | ||
data_object_type: rRNA Filtered Sequencing Reads | ||
description: "rRNA fastq for {id}" | ||
|
||
- Name: Metatranscriptome Reads QC Interleave | ||
Type: nmdc:ReadQcAnalysisActivity | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Git_repo: https://github.com/microbiomedata/metaT_ReadsQC | ||
Version: v0.0.3 | ||
Collection: read_qc_analysis_activity_set | ||
WDL: interleave_rqcfilter.wdl | ||
Input_prefix: nmdc_rqcfilter | ||
Inputs: | ||
proj: "{activity_id}" | ||
input_fastq1: do:Metagenome Raw Read 1 | ||
input_fastq2: do:Metagenome Raw Read 2 | ||
Filter Input Objects: | ||
- Metagenome Raw Read 1 | ||
- Metagenome Raw Read 2 | ||
Predecessors: | ||
- Sequencing Noninterleaved | ||
Activity: | ||
name: "Read QC Activity for {id}" | ||
input_read_bases: "{outputs.stats.input_read_bases}" | ||
input_read_count: "{outputs.stats.input_read_count}" | ||
output_read_bases: "{outputs.stats.output_read_bases}" | ||
output_read_count: "{outputs.stats.output_read_count}" | ||
type: nmdc:ReadQcAnalysisActivity | ||
Outputs: | ||
- output: filtered_final | ||
name: Reads QC result fastq (clean data) | ||
data_object_type: Filtered Sequencing Reads | ||
description: "Reads QC for {id}" | ||
- output: filtered_stats_final | ||
name: Reads QC summary statistics | ||
data_object_type: QC Statistics | ||
description: "Reads QC summary for {id}" | ||
- output: rqc_info | ||
name: File containing read filtering information | ||
data_object_type: Read Filtering Info File | ||
description: "Read filtering info for {id}" | ||
- output: rrna_fastq_final #placeholder until https://github.com/microbiomedata/metaT_ReadsQC/issues/5 is resolved | ||
name: Fastq file containing filtered ribosomal sequences | ||
data_object_type: rRNA Filtered Sequencing Reads | ||
description: "rRNA fastq for {id}" | ||
|
||
- Name: Metatranscriptome Assembly | ||
Type: nmdc:MetatranscriptomeAssembly | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Git_repo: https://github.com/microbiomedata/metaT_Assembly | ||
Version: v0.0.1 | ||
WDL: metaT_assembly.wdl | ||
Collection: metagenome_assembly_set | ||
Predecessors: | ||
- Metatranscriptome Reads QC | ||
- Metatranscriptome Reads QC Interleave | ||
Input_prefix: jgi_metaASM | ||
Inputs: | ||
input_files: do:Filtered Sequencing Reads | ||
proj: "{activity_id}" | ||
Activity: | ||
name: "Metatranscriptome Assembly Activity for {id}" | ||
type: nmdc:MetatranscriptomeAssembly | ||
asm_score: "{outputs.stats.asm_score}" | ||
contig_bp: "{outputs.stats.contig_bp}" | ||
contigs: "{outputs.stats.contigs}" | ||
ctg_l50: "{outputs.stats.ctg_l50}" | ||
ctg_l90: "{outputs.stats.ctg_l90}" | ||
ctg_logsum: "{outputs.stats.ctg_logsum}" | ||
ctg_max: "{outputs.stats.ctg_max}" | ||
ctg_n50: "{outputs.stats.ctg_n50}" | ||
ctg_n90: "{outputs.stats.ctg_n90}" | ||
ctg_powsum: "{outputs.stats.ctg_powsum}" | ||
gap_pct: "{outputs.stats.gap_pct}" | ||
gc_avg: "{outputs.stats.gc_avg}" | ||
gc_std: "{outputs.stats.gc_std}" | ||
Outputs: | ||
- output: final_contigs | ||
name: Final assembly contigs fasta | ||
data_object_type: Assembly Contigs | ||
description: "Assembly contigs for {id}" | ||
- output: final_cov | ||
name: Assembled contigs coverage information | ||
data_object_type: Assembly Coverage Stats | ||
description: "Coverage Stats for {id}" | ||
- output: final_bam | ||
name: Sorted bam file of reads mapping back to the final assembly | ||
data_object_type: Assembly Coverage BAM | ||
description: "Sorted Bam for {id}" | ||
- output: info_file | ||
name: File containing assembly info | ||
data_object_type: Assembly Info File | ||
description: "Assembly info for {id}" | ||
- output: final_bamidx | ||
name: Indexed bam file | ||
data_object_type: BAI File | ||
description: "Alignment index file for {id}" | ||
|
||
- Name: Metatranscriptome Annotation | ||
Type: nmdc:MetatranscriptomeAnnotationActivity | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Git_repo: https://github.com/microbiomedata/mg_annotation | ||
Version: v1.1.0 | ||
WDL: annotation_full.wdl | ||
Collection: metatranscriptome_annotation_set | ||
Predecessors: | ||
- Metatranscriptome Assembly | ||
Input_prefix: annotation | ||
Inputs: | ||
input_file: do:Assembly Contigs | ||
imgap_project_id: "scaffold" | ||
proj: "{activity_id}" | ||
Activity: | ||
name: "Metatranscriptome Annotation Analysis Activity for {id}" | ||
type: nmdc:MetatranscriptomeAnnotationActivity | ||
Outputs: | ||
- output: proteins_faa | ||
data_object_type: Annotation Amino Acid FASTA | ||
description: FASTA Amino Acid File for {id} | ||
name: FASTA amino acid file for annotated proteins | ||
- output: structural_gff | ||
data_object_type: Structural Annotation GFF | ||
description: Structural Annotation for {id} | ||
name: GFF3 format file with structural annotations | ||
- output: functional_gff | ||
data_object_type: Functional Annotation GFF | ||
description: Functional Annotation for {id} | ||
name: GFF3 format file with functional annotations | ||
- output: ko_tsv | ||
data_object_type: Annotation KEGG Orthology | ||
description: KEGG Orthology for {id} | ||
name: Tab delimited file for KO annotation | ||
- output: ec_tsv | ||
data_object_type: Annotation Enzyme Commission | ||
description: EC Annotations for {id} | ||
name: Tab delimited file for EC annotation | ||
suffix: _ec.tsv | ||
- output: lineage_tsv | ||
data_object_type: Scaffold Lineage tsv | ||
description: Scaffold Lineage tsv for {id} | ||
name: Phylogeny at the scaffold level | ||
suffix: _scaffold_lineage.tsv | ||
- output: cog_gff | ||
data_object_type: Clusters of Orthologous Groups (COG) Annotation GFF | ||
description: COGs for {id} | ||
name: GFF3 format file with COGs | ||
- output: pfam_gff | ||
data_object_type: Pfam Annotation GFF | ||
description: Pfam Annotation for {id} | ||
name: GFF3 format file with Pfam | ||
- output: tigrfam_gff | ||
data_object_type: TIGRFam Annotation GFF | ||
description: TIGRFam for {id} | ||
name: GFF3 format file with TIGRfam | ||
- output: smart_gff | ||
data_object_type: SMART Annotation GFF | ||
description: SMART Annotations for {id} | ||
name: GFF3 format file with SMART | ||
- output: supfam_gff | ||
data_object_type: SUPERFam Annotation GFF | ||
description: SUPERFam Annotations for {id} | ||
name: GFF3 format file with SUPERFam | ||
- output: cath_funfam_gff | ||
data_object_type: CATH FunFams (Functional Families) Annotation GFF | ||
description: CATH FunFams for {id} | ||
name: GFF3 format file with CATH FunFams | ||
- output: crt_gff | ||
data_object_type: CRT Annotation GFF | ||
description: CRT Annotations for {id} | ||
name: GFF3 format file with CRT | ||
- output: genemark_gff | ||
data_object_type: Genemark Annotation GFF | ||
description: Genemark Annotations for {id} | ||
name: GFF3 format file with Genemark | ||
- output: prodigal_gff | ||
data_object_type: Prodigal Annotation GFF | ||
description: Prodigal Annotations {id} | ||
name: GFF3 format file with Prodigal | ||
- output: trna_gff | ||
data_object_type: TRNA Annotation GFF | ||
description: TRNA Annotations {id} | ||
name: GFF3 format file with TRNA | ||
- output: final_rfam_gff | ||
data_object_type: RFAM Annotation GFF | ||
description: RFAM Annotations for {id} | ||
name: GFF3 format file with RFAM | ||
- output: ko_ec_gff | ||
data_object_type: KO_EC Annotation GFF | ||
description: KO_EC Annotations for {id} | ||
name: GFF3 format file with KO_EC | ||
- output: product_names_tsv | ||
data_object_type: Product Names | ||
description: Product names for {id} | ||
name: Product names file | ||
- output: gene_phylogeny_tsv | ||
data_object_type: Gene Phylogeny tsv | ||
description: Gene Phylogeny for {id} | ||
name: Gene Phylogeny file | ||
- output: crt_crisprs | ||
data_object_type: Crispr Terms | ||
description: Crispr Terms for {id} | ||
name: Crispr Terms | ||
- output: stats_tsv | ||
data_object_type: Annotation Statistics | ||
description: Annotation Stats for {id} | ||
name: Annotation statistics report | ||
- output: map_file | ||
data_object_type: Contig Mapping File | ||
description: Contig mappings file for {id} | ||
name: Contig mappings between contigs and scaffolds | ||
- output: imgap_version | ||
data_object_type: Annotation Info File | ||
description: Annotation info for {id} | ||
name: File containing annotation info | ||
- output: renamed_fasta | ||
data_object_type: Assembly Contigs | ||
description: Assembly contigs (remapped) for {id} | ||
name: File containing contigs with annotation headers | ||
|
||
|
||
- Name: Expression Analysis Antisense | ||
Type: nmdc:MetatranscriptomeExpressionAnalysis | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Git_repo: https://github.com/microbiomedata/metaT_ReadCounts | ||
Version: v0.0.1 | ||
WDL: readcount.wdl | ||
Collection: metatranscriptome_expression_analysis_set | ||
Predecessors: | ||
- Metatranscriptome Annotation | ||
Input_prefix: nmdc_expression | ||
Inputs: | ||
gff_file: do:Functional Annotation GFF | ||
map: do:Contig Mapping File | ||
bam: do:Assembly Coverage BAM | ||
rna_type: "aRNA" | ||
proj: "{activity_id}" | ||
Activity: | ||
name: "Metatranscriptome Expression Analysis for {id}" | ||
type: nmdc:MetatranscriptomeExpressionAnalysis | ||
Outputs: | ||
- output: count_table | ||
data_object_type: Metatranscriptome Expression | ||
description: Expression counts for {id} | ||
name: Metatranscriptome expression table | ||
- output: count_ig | ||
data_object_type: Metatranscriptome Expression Intergenic | ||
description: Metatranscriptome expression intergenic regions for {id} | ||
name: Metatranscriptome expression intergenic table | ||
optional: true | ||
- output: info_file | ||
data_object_type: Metatranscriptome Expression Info File | ||
description: Expression info for {id} | ||
name: Metatranscriptome Expression Info File | ||
|
||
- Name: Expression Analysis Sense | ||
Type: nmdc:MetatranscriptomeExpressionAnalysis | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Git_repo: https://github.com/microbiomedata/metaT_ReadCounts | ||
Version: v0.0.1 | ||
WDL: readcount.wdl | ||
Collection: metatranscriptome_expression_analysis_set | ||
Predecessors: | ||
- Metatranscriptome Annotation | ||
Input_prefix: nmdc_expression | ||
Inputs: | ||
gff_file: do:Functional Annotation GFF | ||
map: do:Contig Mapping File | ||
bam: do:Assembly Coverage BAM | ||
proj: "{activity_id}" | ||
Activity: | ||
name: "Metatranscriptome Expression Analysis for {id}" | ||
type: nmdc:MetatranscriptomeExpressionAnalysis | ||
Outputs: | ||
- output: count_table | ||
data_object_type: Metatranscriptome Expression | ||
description: Expression counts for {id} | ||
name: Metatranscriptome expression table | ||
- output: count_ig | ||
data_object_type: Metatranscriptome Expression Intergenic | ||
description: Metatranscriptome expression intergenic regions for {id} | ||
name: Metatranscriptome expression intergenic table | ||
optional: true | ||
- output: info_file | ||
data_object_type: Metatranscriptome Expression Info File | ||
description: Expression info for {id} | ||
name: Metatranscriptome Expression Info File | ||
|
||
- Name: Expression Analysis Nonstranded | ||
Type: nmdc:MetatranscriptomeExpressionAnalysis | ||
Enabled: True | ||
Analyte Category: Metatranscriptome | ||
Git_repo: https://github.com/microbiomedata/metaT_ReadCounts | ||
Version: v0.0.1 | ||
WDL: readcount.wdl | ||
Collection: metatranscriptome_expression_analysis_set | ||
Predecessors: | ||
- Metatranscriptome Annotation | ||
Input_prefix: nmdc_expression | ||
Inputs: | ||
gff_file: do:Functional Annotation GFF | ||
map: do:Contig Mapping File | ||
bam: do:Assembly Coverage BAM | ||
rna_type: "non_stranded_RNA" | ||
proj: "{activity_id}" | ||
Activity: | ||
name: "Metatranscriptome Expression Analysis for {id}" | ||
type: nmdc:MetatranscriptomeExpressionAnalysis | ||
Outputs: | ||
- output: count_table | ||
data_object_type: Metatranscriptome Expression | ||
description: Expression counts for {id} | ||
name: Metatranscriptome expression table | ||
- output: count_ig | ||
data_object_type: Metatranscriptome Expression Intergenic | ||
description: Metatranscriptome expression intergenic regions for {id} | ||
name: Metatranscriptome expression intergenic table | ||
optional: true | ||
- output: info_file | ||
data_object_type: Metatranscriptome Expression Info File | ||
description: Expression info for {id} | ||
name: Metatranscriptome Expression Info File |
Oops, something went wrong.