Package Bio :: Package Sequencing :: Package Applications :: Module _Novoalign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Sequencing.Applications._Novoalign

  1  # Copyright 2009 by Osvaldo Zagordi.  All rights reserved. 
  2  # Revisions copyright 2010 by Peter Cock. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Command line wrapper for the short read aligner Novoalign by Novocraft.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  from Bio.Application import _Option, AbstractCommandline 
 11   
 12   
13 -class NovoalignCommandline(AbstractCommandline):
14 """Command line wrapper for novoalign by Novocraft. 15 16 See www.novocraft.com - novoalign is a short read alignment program. 17 18 Example: 19 20 >>> from Bio.Sequencing.Applications import NovoalignCommandline 21 >>> novoalign_cline = NovoalignCommandline(database='some_db', 22 ... readfile='some_seq.txt') 23 >>> print(novoalign_cline) 24 novoalign -d some_db -f some_seq.txt 25 26 As with all the Biopython application wrappers, you can also add or 27 change options after creating the object: 28 29 >>> novoalign_cline.format = 'PRBnSEQ' 30 >>> novoalign_cline.r_method='0.99' # limited valid values 31 >>> novoalign_cline.fragment = '250 20' # must be given as a string 32 >>> novoalign_cline.miRNA = 100 33 >>> print(novoalign_cline) 34 novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100 35 36 You would typically run the command line with novoalign_cline() or via 37 the Python subprocess module, as described in the Biopython tutorial. 38 39 Last checked against version: 2.05.04 40 """
41 - def __init__(self, cmd="novoalign", **kwargs):
42 43 READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ'] 44 REPORT_FORMAT = ['Native', 'Pairwise', 'SAM'] 45 REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99'] 46 47 self.parameters = \ 48 [ 49 _Option(["-d", "database"], 50 "database filename", 51 filename=True, 52 equate=False), 53 _Option(["-f", "readfile"], 54 "read file", 55 filename=True, 56 equate=False), 57 _Option(["-F", "format"], 58 "Format of read files.\n\nAllowed values: %s" 59 % ", ".join(READ_FORMAT), 60 checker_function=lambda x: x in READ_FORMAT, 61 equate=False), 62 63 # Alignment scoring options 64 _Option(["-t", "threshold"], 65 "Threshold for alignment score", 66 checker_function=lambda x: isinstance(x, int), 67 equate=False), 68 _Option(["-g", "gap_open"], 69 "Gap opening penalty [default: 40]", 70 checker_function=lambda x: isinstance(x, int), 71 equate=False), 72 _Option(["-x", "gap_extend"], 73 "Gap extend penalty [default: 15]", 74 checker_function=lambda x: isinstance(x, int), 75 equate=False), 76 _Option(["-u", "unconverted"], 77 "Experimental: unconverted cytosines penalty in bisulfite mode\n\n" 78 "Default: no penalty", 79 checker_function=lambda x: isinstance(x, int), 80 equate=False), 81 82 # Quality control and read filtering 83 _Option(["-l", "good_bases"], 84 "Minimum number of good quality bases [default: log(N_g, 4) + 5]", 85 checker_function=lambda x: isinstance(x, int), 86 equate=False), 87 _Option(["-h", "homopolymer"], 88 "Homopolymer read filter [default: 20; disable: negative value]", 89 checker_function=lambda x: isinstance(x, int), 90 equate=False), 91 92 # Read preprocessing options 93 _Option(["-a", "adapter3"], 94 "Strips a 3' adapter sequence prior to alignment.\n\n" 95 "With paired ends two adapters can be specified", 96 checker_function=lambda x: isinstance(x, str), 97 equate=False), 98 _Option(["-n", "truncate"], 99 "Truncate to specific length before alignment", 100 checker_function=lambda x: isinstance(x, int), 101 equate=False), 102 _Option(["-s", "trimming"], 103 "If fail to align, trim by s bases until they map or become shorter than l.\n\n" 104 "Ddefault: 2", 105 checker_function=lambda x: isinstance(x, int), 106 equate=False), 107 _Option(["-5", "adapter5"], 108 "Strips a 5' adapter sequence.\n\n" 109 "Similar to -a (adaptor3), but on the 5' end.", 110 checker_function=lambda x: isinstance(x, str), 111 equate=False), 112 # Reporting options 113 _Option(["-o", "report"], 114 "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" 115 % ", ".join(REPORT_FORMAT), 116 checker_function=lambda x: x in REPORT_FORMAT, 117 equate=False), 118 _Option(["-Q", "quality"], 119 "Lower threshold for an alignment to be reported [default: 0]", 120 checker_function=lambda x: isinstance(x, int), 121 equate=False), 122 _Option(["-R", "repeats"], 123 "If score difference is higher, report repeats.\n\n" 124 "Otherwise -r read method applies [default: 5]", 125 checker_function=lambda x: isinstance(x, int), 126 equate=False), 127 _Option(["-r", "r_method"], 128 "Methods to report reads with multiple matches.\n\n" 129 "Allowed values: %s\n" 130 "'All' and 'Exhaustive' accept limits." 131 % ", ".join(REPEAT_METHOD), 132 checker_function=lambda x: x.split()[0] in REPEAT_METHOD, 133 equate=False), 134 _Option(["-e", "recorded"], 135 "Alignments recorded with score equal to the best.\n\n" 136 "Default: 1000 in default read method, otherwise no limit.", 137 checker_function=lambda x: isinstance(x, int), 138 equate=False), 139 _Option(["-q", "qual_digits"], 140 "Decimal digits for quality scores [default: 0]", 141 checker_function=lambda x: isinstance(x, int), 142 equate=False), 143 144 # Paired end options 145 _Option(["-i", "fragment"], 146 "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", 147 checker_function=lambda x: len(x.split()) == 2, 148 equate=False), 149 _Option(["-v", "variation"], 150 "Structural variation penalty [default: 70]", 151 checker_function=lambda x: isinstance(x, int), 152 equate=False), 153 154 # miRNA mode 155 _Option(["-m", "miRNA"], 156 "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", 157 checker_function=lambda x: isinstance(x, int), 158 equate=False), 159 160 # Multithreading 161 _Option(["-c", "cores"], 162 "Number of threads, disabled on free versions [default: number of cores]", 163 checker_function=lambda x: isinstance(x, int), 164 equate=False), 165 166 # Quality calibrations 167 _Option(["-k", "read_cal"], 168 "Read quality calibration from file (mismatch counts)", 169 checker_function=lambda x: isinstance(x, str), 170 equate=False), 171 _Option(["-K", "write_cal"], 172 "Accumulate mismatch counts and write to file", 173 checker_function=lambda x: isinstance(x, str), 174 equate=False), 175 ] 176 AbstractCommandline.__init__(self, cmd, **kwargs)
177 178
179 -def _test():
180 """Run the module's doctests (PRIVATE).""" 181 print("Running Novoalign doctests...") 182 import doctest 183 doctest.testmod() 184 print("Done")
185 186 if __name__ == "__main__": 187 _test() 188