forked from nextgenusfs/genome_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
proteome_compare.py
executable file
·82 lines (71 loc) · 3.3 KB
/
proteome_compare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
import sys, subprocess
from natsort import natsorted
#script to run global alignment using usearch8 and then parse the output
if len(sys.argv) < 2:
print "Usage: proteome_compare.py query.faa reference.faa singletons.output.txt okay.hits.txt"
sys.exit(1)
def compare(input):
global singletons
global good
singletons = []
good = []
cantsithereseatstaken = []
with open(input, 'rU') as input:
identical = 0; great = 0; okay = 0; partial = 0; unique = 0; others = 0; bad = 0;
for line in input:
col = line.split('\t')
if col[1] == '*':
singletons.append([col[0]])
unique += 1
continue
if float(col[2]) == 100.0 and int(col[3]) == int(col[4]) and int(col[5]) == int(col[6]):
identical += 1
if not col[1] in cantsithereseatstaken:
good.append([col[0],col[1]])
cantsithereseatstaken.append(col[1])
continue
elif float(col[2]) >= 95.0 and int(col[5]) >= 95 and int(col[6]) >= 95:
great += 1
if not col[1] in cantsithereseatstaken:
good.append([col[0],col[1]])
cantsithereseatstaken.append(col[1])
continue
elif float(col[2]) >= 80.0 and int(col[5]) >= 80 and int(col[6]) >= 80:
okay += 1
if not col[1] in cantsithereseatstaken:
good.append([col[0],col[1]])
cantsithereseatstaken.append(col[1])
continue
elif float(col[2]) >= 60.0 and int(col[5]) >= 60 and int(col[6]) >= 60:
others +=1
if not col[1] in cantsithereseatstaken:
good.append([col[0],col[1]])
cantsithereseatstaken.append(col[1])
continue
else:
bad += 1
singletons.append([col[0]])
return (identical, great, okay, others, unique, bad)
subprocess.call(['usearch8', '-usearch_global', sys.argv[1], '-db', sys.argv[2], '-id', '0.6', '-userout', 'prot_compare1.txt', '-userfields', 'query+target+id+ql+tl+qcov+tcov', '-output_no_hits', '-top_hit_only', '-strand', 'plus', '-notrunclabels'])
subprocess.call(['usearch8', '-db', sys.argv[1], '-usearch_global', sys.argv[2], '-id', '0.6', '-userout', 'prot_compare2.txt', '-userfields', 'query+target+id+ql+tl+qcov+tcov', '-output_no_hits', '-top_hit_only', '-strand', 'plus', '-notrunclabels'])
#parse output
qvsr = compare('prot_compare1.txt')
with open(sys.argv[3], 'w') as output:
for hit in singletons:
output.write("%s\n" % (hit[0]))
with open(sys.argv[4], 'w') as output:
for hit in good:
output.write("%s\t%s\n" % (hit[0], hit[1]))
rvsq = compare('prot_compare2.txt')
print"--------------------------------"
print"Query vs Ref:"
print"--------------------------------"
print"Identical (100%% id,cov): %i" % (qvsr[0])
print"Great (>95%% id,cov): %i" % (qvsr[1])
print"Good (>80%% id,cov): %i" % (qvsr[2])
print"Others (>60%% id,cov): %i" % (qvsr[3])
print"Bad (>60%%, cov < 60%%): %i" % (qvsr[5])
print"Unique (to the query): %i" % (qvsr[4])
print"Missing (unique to ref): %i" % (rvsq[4])
print"--------------------------------"