forked from microbiomedata/ReadsQC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rqcfilter.wdl
112 lines (101 loc) · 3.46 KB
/
rqcfilter.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
workflow jgi_rqcfilter {
Array[File] input_files
String? outdir
String bbtools_container="microbiomedata/bbtools:38.44"
String database="/refdata"
String? memory
String? threads
scatter(file in input_files) {
call rqcfilter{
input: input_file=file,
container=bbtools_container,
database=database,
memory=memory,
threads=threads
}
}
# rqcfilter.stat implicit as Array because of scatter
call make_output {
input: outdir= outdir, rqcfilter_output=rqcfilter.stat
}
output{
Array[File] clean_fastq_files = make_output.fastq_files
}
parameter_meta {
input_files: "illumina paired-end interleaved fastq files"
outdir: "The final output directory path"
database : "database path to RQCFilterData directory"
clean_fastq_files: "after QC fastq files"
memory: "optional for jvm memory for bbtools, ex: 32G"
threads: "optional for jvm threads for bbtools ex: 16"
}
meta {
author: "Chienchi Lo, B10, LANL"
email: "[email protected]"
version: "1.0.0"
}
}
task rqcfilter {
File input_file
String container
String database
String? memory
String? threads
String filename_outlog="stdout.log"
String filename_errlog="stderr.log"
String filename_stat="filtered/filterStats.txt"
String filename_stat2="filtered/filterStats2.txt"
String filename_stat_json="filtered/filterStats.json"
String system_cpu="$(grep \"model name\" /proc/cpuinfo | wc -l)"
String jvm_threads=select_first([threads,system_cpu])
runtime {
docker: container
mem: memory
database: database
}
command<<<
#sleep 30
export TIME="time result\ncmd:%C\nreal %es\nuser %Us \nsys %Ss \nmemory:%MKB \ncpu %P"
set -eo pipefail
rqcfilter2.sh -Xmx${default="105G" memory} threads=${jvm_threads} jni=t in=${input_file} path=filtered rna=f trimfragadapter=t qtrim=r trimq=0 maxns=3 maq=3 minlen=51 mlf=0.33 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch kapa=t clumpify=t tmpdir= barcodefilter=f trimpolyg=5 usejni=f rqcfilterdata=/databases/RQCFilterData > >(tee -a ${filename_outlog}) 2> >(tee -a ${filename_errlog} >&2)
python <<CODE
import json
f = open("${filename_stat}",'r')
d = dict()
for line in f:
if not line.rstrip():continue
key,value=line.rstrip().split('=')
d[key]=float(value) if 'Ratio' in key else int(value)
with open("${filename_stat_json}", 'w') as outfile:
json.dump(d, outfile)
CODE
>>>
output {
File stdout = filename_outlog
File stderr = filename_errlog
File stat = filename_stat
File stat2 = filename_stat2
}
}
task make_output{
String outdir
Array[String] rqcfilter_output
command{
for i in ${sep=' ' rqcfilter_output}
do
rqcfilter_path=`dirname $i`
prefix=$(basename $rqcfilter_path/*.anqdpht.fastq.gz .anqdpht.fastq.gz)
mkdir -p ${outdir}/$prefix
mv -f $rqcfilter_path/* ${outdir}/$prefix
echo ${outdir}/$prefix/$prefix.anqdpht.fastq.gz
done
chmod 764 -R ${outdir}
}
runtime {
mem: "1 GiB"
cpu: 1
}
output{
Array[String] fastq_files = read_lines(stdout())
}
}