forked from xiezhq/ISEScan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
isescan.py
87 lines (69 loc) · 2.57 KB
/
isescan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ISEScan version
version = '1.7.2.3'
import argparse
import os
import sys
import datetime
import isPredict
def isPredictSingle(args):
print('ISEScan starts at', datetime.datetime.now().ctime())
seqfile = args['seqfile']
output = args['output']
seqfilename = os.path.basename(seqfile)
org = os.path.basename(os.path.dirname(seqfile))
filelist = org + '_' + seqfilename + '.list'
with open(filelist, 'w') as fp:
fp.write(seqfile+'\n')
isPredict.isPredict(filelist, args['output'], args['removeShortIS'], args['translateGenome'],
args['nthread'])
os.remove(filelist)
print('ISEScan ends at', datetime.datetime.now().ctime())
if __name__ == "__main__":
import textwrap
# Parse command line arguments
descriptStr = '''\
ISEScan is a python pipeline to identify Insertion Sequence elements (both complete and incomplete IS elements) in genom. A typical invocation would be:
python3 isescan.py seqfile proteome hmm
- If you want isescan to report only complete IS elements, you need to set command line option --removeShortIS.'''
parser = argparse.ArgumentParser(prog='isescan', description = textwrap.dedent(descriptStr),
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version', version='%(prog)s' + ' ' + version)
parser.add_argument(
'--removeShortIS',
action='store_true',
help = "Remove incomplete (partial) IS elements which include IS element with length < 400 or single copy IS element without perfect TIR.",
)
parser.add_argument(
'--no-FragGeneScan',
action='store_false',
help = "Use the annotated protein sequences in NCBI GenBank file (.gbk which must be in the same folder with genome sequence file), instead of the protein sequences predicted/translated by FragGeneScan. (Experimental feature!)",
)
parser.add_argument(
'--seqfile',
required = True,
default='',
help = "Sequence file in fasta format, '' by default",
)
parser.add_argument(
'--output',
required = True,
default='results',
help = "Output directory, 'results' by default",
)
parser.add_argument(
'--nthread',
required = False,
type = int,
default = 1,
help = 'Number of CPU cores used for FragGeneScan and hmmer, 1 by default.')
args = parser.parse_args()
args4isPredictSingle = {
'removeShortIS' : args.removeShortIS,
'translateGenome' : args.no_FragGeneScan,
'seqfile': args.seqfile.strip(),
'output': args.output.strip(),
'nthread': args.nthread,
}
isPredictSingle(args4isPredictSingle)