1
2
3
4
5
6 """Command line wrapper for the short read aligner Novoalign by Novocraft."""
7 import types
8 from Bio.Application import _Option, AbstractCommandline
9
10
12 """Command line wrapper for novoalign by Novocraft.
13
14 See www.novocraft.com - novoalign is a short read alignment program.
15
16 Example:
17
18 >>> from Bio.Sequencing.Applications import NovoalignCommandline
19 >>> novoalign_cline = NovoalignCommandline(database='some_db',
20 ... readfile='some_seq.txt')
21 >>> print novoalign_cline
22 novoalign -d some_db -f some_seq.txt
23
24 As with all the Biopython application wrappers, you can also add or
25 change options after creating the object:
26
27 >>> novoalign_cline.format = 'PRBnSEQ'
28 >>> novoalign_cline.r_method='0.99' # limited valid values
29 >>> novoalign_cline.fragment = '250 20' # must be given as a string
30 >>> novoalign_cline.miRNA = 100
31 >>> print novoalign_cline
32 novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100
33
34 You would typically run the command line with novoalign_cline() or via
35 the Python subprocess module, as described in the Biopython tutorial.
36
37 Last checked against version: 2.05.04
38 """
39 - def __init__(self, cmd="novoalign", **kwargs):
40
41 READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ']
42 REPORT_FORMAT = ['Native', 'Pairwise', 'SAM']
43 REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99']
44
45 self.parameters = \
46 [
47 _Option(["-d", "database"],
48 "database filename",
49 filename=True,
50 equate=False),
51 _Option(["-f", "readfile"],
52 "read file",
53 filename=True,
54 equate=False),
55 _Option(["-F", "format"],
56 "Format of read files.\n\nAllowed values: %s"
57 % ", ".join(READ_FORMAT),
58 checker_function=lambda x: x in READ_FORMAT,
59 equate=False),
60
61
62 _Option(["-t", "threshold"],
63 "Threshold for alignment score",
64 checker_function=lambda x: isinstance(x, types.IntType),
65 equate=False),
66 _Option(["-g", "gap_open"],
67 "Gap opening penalty [default: 40]",
68 checker_function=lambda x: isinstance(x, types.IntType),
69 equate=False),
70 _Option(["-x", "gap_extend"],
71 "Gap extend penalty [default: 15]",
72 checker_function=lambda x: isinstance(x, types.IntType),
73 equate=False),
74 _Option(["-u", "unconverted"],
75 "Experimental: unconverted cytosines penalty in bisulfite mode\n\n"
76 "Default: no penalty",
77 checker_function=lambda x: isinstance(x, types.IntType),
78 equate=False),
79
80
81 _Option(["-l", "good_bases"],
82 "Minimum number of good quality bases [default: log(N_g, 4) + 5]",
83 checker_function=lambda x: isinstance(x, types.IntType),
84 equate=False),
85 _Option(["-h", "homopolymer"],
86 "Homopolymer read filter [default: 20; disable: negative value]",
87 checker_function=lambda x: isinstance(x, types.IntType),
88 equate=False),
89
90
91 _Option(["-a", "adapter3"],
92 "Strips a 3' adapter sequence prior to alignment.\n\n"
93 "With paired ends two adapters can be specified",
94 checker_function=lambda x: isinstance(x, types.StringType),
95 equate=False),
96 _Option(["-n", "truncate"],
97 "Truncate to specific length before alignment",
98 checker_function=lambda x: isinstance(x, types.IntType),
99 equate=False),
100 _Option(["-s", "trimming"],
101 "If fail to align, trim by s bases until they map or become shorter than l.\n\n"
102 "Ddefault: 2",
103 checker_function=lambda x: isinstance(x, types.IntType),
104 equate=False),
105 _Option(["-5", "adapter5"],
106 "Strips a 5' adapter sequence.\n\n"
107 "Similar to -a (adaptor3), but on the 5' end.",
108 checker_function=lambda x: isinstance(x, types.StringType),
109 equate=False),
110
111 _Option(["-o", "report"],
112 "Specifies the report format.\n\nAllowed values: %s\nDefault: Native"
113 % ", ".join(REPORT_FORMAT),
114 checker_function=lambda x: x in REPORT_FORMAT,
115 equate=False),
116 _Option(["-Q", "quality"],
117 "Lower threshold for an alignment to be reported [default: 0]",
118 checker_function=lambda x: isinstance(x, types.IntType),
119 equate=False),
120 _Option(["-R", "repeats"],
121 "If score difference is higher, report repeats.\n\n"
122 "Otherwise -r read method applies [default: 5]",
123 checker_function=lambda x: isinstance(x, types.IntType),
124 equate=False),
125 _Option(["-r", "r_method"],
126 "Methods to report reads with multiple matches.\n\n"
127 "Allowed values: %s\n"
128 "'All' and 'Exhaustive' accept limits."
129 % ", ".join(REPEAT_METHOD),
130 checker_function=lambda x: x.split()[0] in REPEAT_METHOD,
131 equate=False),
132 _Option(["-e", "recorded"],
133 "Alignments recorded with score equal to the best.\n\n"
134 "Default: 1000 in default read method, otherwise no limit.",
135 checker_function=lambda x: isinstance(x, types.IntType),
136 equate=False),
137 _Option(["-q", "qual_digits"],
138 "Decimal digits for quality scores [default: 0]",
139 checker_function=lambda x: isinstance(x, types.IntType),
140 equate=False),
141
142
143 _Option(["-i", "fragment"],
144 "Fragment length (2 reads + insert) and standard deviation [default: 250 30]",
145 checker_function=lambda x: len(x.split()) == 2,
146 equate=False),
147 _Option(["-v", "variation"],
148 "Structural variation penalty [default: 70]",
149 checker_function=lambda x: isinstance(x, types.IntType),
150 equate=False),
151
152
153 _Option(["-m", "miRNA"],
154 "Sets miRNA mode and optionally sets a value for the region scanned [default: off]",
155 checker_function=lambda x: isinstance(x, types.IntType),
156 equate=False),
157
158
159 _Option(["-c", "cores"],
160 "Number of threads, disabled on free versions [default: number of cores]",
161 checker_function=lambda x: isinstance(x, types.IntType),
162 equate=False),
163
164
165 _Option(["-k", "read_cal"],
166 "Read quality calibration from file (mismatch counts)",
167 checker_function=lambda x: isinstance(x, types.StringType),
168 equate=False),
169 _Option(["-K", "write_cal"],
170 "Accumulate mismatch counts and write to file",
171 checker_function=lambda x: isinstance(x, types.StringType),
172 equate=False),
173 ]
174 AbstractCommandline.__init__(self, cmd, **kwargs)
175
176
178 """Run the module's doctests (PRIVATE)."""
179 print "Running Novoalign doctests..."
180 import doctest
181 doctest.testmod()
182 print "Done"
183
184 if __name__ == "__main__":
185 _test()
186