Package Bio :: Package Sequencing :: Package Applications :: Module _Novoalign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Sequencing.Applications._Novoalign

  1  # Copyright 2009 by Osvaldo Zagordi.  All rights reserved. 
  2  # Revisions copyright 2010 by Peter Cock. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Command line wrapper for the short read aligner Novoalign by Novocraft.""" 
  7  import types 
  8  from Bio.Application import _Option, AbstractCommandline 
  9   
 10   
11 -class NovoalignCommandline(AbstractCommandline):
12 """Command line wrapper for novoalign by Novocraft. 13 14 See www.novocraft.com - novoalign is a short read alignment program. 15 16 Example: 17 18 >>> from Bio.Sequencing.Applications import NovoalignCommandline 19 >>> novoalign_cline = NovoalignCommandline(database='some_db', 20 ... readfile='some_seq.txt') 21 >>> print novoalign_cline 22 novoalign -d some_db -f some_seq.txt 23 24 As with all the Biopython application wrappers, you can also add or 25 change options after creating the object: 26 27 >>> novoalign_cline.format = 'PRBnSEQ' 28 >>> novoalign_cline.r_method='0.99' # limited valid values 29 >>> novoalign_cline.fragment = '250 20' # must be given as a string 30 >>> novoalign_cline.miRNA = 100 31 >>> print novoalign_cline 32 novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100 33 34 You would typically run the command line with novoalign_cline() or via 35 the Python subprocess module, as described in the Biopython tutorial. 36 37 Last checked against version: 2.05.04 38 """
39 - def __init__(self, cmd="novoalign", **kwargs):
40 41 READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ'] 42 REPORT_FORMAT = ['Native', 'Pairwise', 'SAM'] 43 REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99'] 44 45 self.parameters = \ 46 [ 47 _Option(["-d", "database"], 48 "database filename", 49 filename=True, 50 equate=False), 51 _Option(["-f", "readfile"], 52 "read file", 53 filename=True, 54 equate=False), 55 _Option(["-F", "format"], 56 "Format of read files.\n\nAllowed values: %s" 57 % ", ".join(READ_FORMAT), 58 checker_function=lambda x: x in READ_FORMAT, 59 equate=False), 60 61 # Alignment scoring options 62 _Option(["-t", "threshold"], 63 "Threshold for alignment score", 64 checker_function=lambda x: isinstance(x, types.IntType), 65 equate=False), 66 _Option(["-g", "gap_open"], 67 "Gap opening penalty [default: 40]", 68 checker_function=lambda x: isinstance(x, types.IntType), 69 equate=False), 70 _Option(["-x", "gap_extend"], 71 "Gap extend penalty [default: 15]", 72 checker_function=lambda x: isinstance(x, types.IntType), 73 equate=False), 74 _Option(["-u", "unconverted"], 75 "Experimental: unconverted cytosines penalty in bisulfite mode\n\n" 76 "Default: no penalty", 77 checker_function=lambda x: isinstance(x, types.IntType), 78 equate=False), 79 80 # Quality control and read filtering 81 _Option(["-l", "good_bases"], 82 "Minimum number of good quality bases [default: log(N_g, 4) + 5]", 83 checker_function=lambda x: isinstance(x, types.IntType), 84 equate=False), 85 _Option(["-h", "homopolymer"], 86 "Homopolymer read filter [default: 20; disable: negative value]", 87 checker_function=lambda x: isinstance(x, types.IntType), 88 equate=False), 89 90 # Read preprocessing options 91 _Option(["-a", "adapter3"], 92 "Strips a 3' adapter sequence prior to alignment.\n\n" 93 "With paired ends two adapters can be specified", 94 checker_function=lambda x: isinstance(x, types.StringType), 95 equate=False), 96 _Option(["-n", "truncate"], 97 "Truncate to specific length before alignment", 98 checker_function=lambda x: isinstance(x, types.IntType), 99 equate=False), 100 _Option(["-s", "trimming"], 101 "If fail to align, trim by s bases until they map or become shorter than l.\n\n" 102 "Ddefault: 2", 103 checker_function=lambda x: isinstance(x, types.IntType), 104 equate=False), 105 _Option(["-5", "adapter5"], 106 "Strips a 5' adapter sequence.\n\n" 107 "Similar to -a (adaptor3), but on the 5' end.", 108 checker_function=lambda x: isinstance(x, types.StringType), 109 equate=False), 110 # Reporting options 111 _Option(["-o", "report"], 112 "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" 113 % ", ".join(REPORT_FORMAT), 114 checker_function=lambda x: x in REPORT_FORMAT, 115 equate=False), 116 _Option(["-Q", "quality"], 117 "Lower threshold for an alignment to be reported [default: 0]", 118 checker_function=lambda x: isinstance(x, types.IntType), 119 equate=False), 120 _Option(["-R", "repeats"], 121 "If score difference is higher, report repeats.\n\n" 122 "Otherwise -r read method applies [default: 5]", 123 checker_function=lambda x: isinstance(x, types.IntType), 124 equate=False), 125 _Option(["-r", "r_method"], 126 "Methods to report reads with multiple matches.\n\n" 127 "Allowed values: %s\n" 128 "'All' and 'Exhaustive' accept limits." 129 % ", ".join(REPEAT_METHOD), 130 checker_function=lambda x: x.split()[0] in REPEAT_METHOD, 131 equate=False), 132 _Option(["-e", "recorded"], 133 "Alignments recorded with score equal to the best.\n\n" 134 "Default: 1000 in default read method, otherwise no limit.", 135 checker_function=lambda x: isinstance(x, types.IntType), 136 equate=False), 137 _Option(["-q", "qual_digits"], 138 "Decimal digits for quality scores [default: 0]", 139 checker_function=lambda x: isinstance(x, types.IntType), 140 equate=False), 141 142 # Paired end options 143 _Option(["-i", "fragment"], 144 "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", 145 checker_function=lambda x: len(x.split()) == 2, 146 equate=False), 147 _Option(["-v", "variation"], 148 "Structural variation penalty [default: 70]", 149 checker_function=lambda x: isinstance(x, types.IntType), 150 equate=False), 151 152 # miRNA mode 153 _Option(["-m", "miRNA"], 154 "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", 155 checker_function=lambda x: isinstance(x, types.IntType), 156 equate=False), 157 158 # Multithreading 159 _Option(["-c", "cores"], 160 "Number of threads, disabled on free versions [default: number of cores]", 161 checker_function=lambda x: isinstance(x, types.IntType), 162 equate=False), 163 164 # Quality calibrations 165 _Option(["-k", "read_cal"], 166 "Read quality calibration from file (mismatch counts)", 167 checker_function=lambda x: isinstance(x, types.StringType), 168 equate=False), 169 _Option(["-K", "write_cal"], 170 "Accumulate mismatch counts and write to file", 171 checker_function=lambda x: isinstance(x, types.StringType), 172 equate=False), 173 ] 174 AbstractCommandline.__init__(self, cmd, **kwargs)
175 176
177 -def _test():
178 """Run the module's doctests (PRIVATE).""" 179 print "Running Novoalign doctests..." 180 import doctest 181 doctest.testmod() 182 print "Done"
183 184 if __name__ == "__main__": 185 _test() 186