Package Bio :: Package Align :: Package Applications :: Module _Dialign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Dialign

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program DIALIGN2-2. 
  6  """ 
  7   
  8  __docformat__ = "epytext en"  # Don't just use plain text in epydoc API pages! 
  9   
 10  from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline 
 11   
 12   
13 -class DialignCommandline(AbstractCommandline):
14 """Command line wrapper for the multiple alignment program DIALIGN2-2. 15 16 http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html 17 18 Example: 19 20 To align a FASTA file (unaligned.fasta) with the output files names 21 aligned.* including a FASTA output file (aligned.fa), use: 22 23 >>> from Bio.Align.Applications import DialignCommandline 24 >>> dialign_cline = DialignCommandline(input="unaligned.fasta", 25 ... fn="aligned", fa=True) 26 >>> print dialign_cline 27 dialign2-2 -fa -fn aligned unaligned.fasta 28 29 You would typically run the command line with dialign_cline() or via 30 the Python subprocess module, as described in the Biopython tutorial. 31 32 Citation: 33 34 B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence 35 Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36. 36 37 Last checked against version: 2.2 38 """
39 - def __init__(self, cmd="dialign2-2", **kwargs):
40 self.program_name = cmd 41 self.parameters = \ 42 [ 43 _Switch(["-afc", "afc"], 44 "Creates additional output file '*.afc' " 45 "containing data of all fragments considered " 46 "for alignment WARNING: this file can be HUGE !"), 47 _Switch(["-afc_v", "afc_v"], 48 "Like '-afc' but verbose: fragments are explicitly " 49 "printed. WARNING: this file can be EVEN BIGGER !"), 50 _Switch(["-anc", "anc"], 51 "Anchored alignment. Requires a file <seq_file>.anc " 52 "containing anchor points."), 53 _Switch(["-cs", "cs"], 54 "If segments are translated, not only the `Watson " 55 "strand' but also the `Crick strand' is looked at."), 56 _Switch(["-cw", "cw"], 57 "Additional output file in CLUSTAL W format."), 58 _Switch(["-ds", "ds"], 59 "`dna alignment speed up' - non-translated nucleic acid " 60 "fragments are taken into account only if they start " 61 "with at least two matches. Speeds up DNA alignment at " 62 "the expense of sensitivity."), 63 _Switch(["-fa", "fa"], 64 "Additional output file in FASTA format."), 65 _Switch(["-ff", "ff"], 66 "Creates file *.frg containing information about all " 67 "fragments that are part of the respective optimal " 68 "pairwise alignmnets plus information about " 69 "consistency in the multiple alignment"), 70 _Option(["-fn", "fn"], 71 "Output files are named <out_file>.<extension>.", 72 equate=False), 73 _Switch(["-fop", "fop"], 74 "Creates file *.fop containing coordinates of all " 75 "fragments that are part of the respective pairwise alignments."), 76 _Switch(["-fsm", "fsm"], 77 "Creates file *.fsm containing coordinates of all " 78 "fragments that are part of the final alignment"), 79 _Switch(["-iw", "iw"], 80 "Overlap weights switched off (by default, overlap " 81 "weights are used if up to 35 sequences are aligned). " 82 "This option speeds up the alignment but may lead " 83 "to reduced alignment quality."), 84 _Switch(["-lgs", "lgs"], 85 "`long genomic sequences' - combines the following " 86 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " 87 "-fop, -ff, -cs, -ds, -pst "), 88 _Switch(["-lgs_t", "lgs_t"], 89 "Like '-lgs' but with all segment pairs assessed " 90 "at the peptide level (rather than 'mixed alignments' " 91 "as with the '-lgs' option). Therefore faster than " 92 "-lgs but not very sensitive for non-coding regions."), 93 _Option(["-lmax", "lmax"], 94 "Maximum fragment length = x (default: x = 40 or " 95 "x = 120 for `translated' fragments). Shorter x " 96 "speeds up the program but may affect alignment quality.", 97 checker_function=lambda x: isinstance(x, int), 98 equate=False), 99 _Switch(["-lo", "lo"], 100 "(Long Output) Additional file *.log with information " 101 "about fragments selected for pairwise alignment and " 102 "about consistency in multi-alignment proceedure."), 103 _Switch(["-ma", "ma"], 104 "`mixed alignments' consisting of P-fragments and " 105 "N-fragments if nucleic acid sequences are aligned."), 106 _Switch(["-mask", "mask"], 107 "Residues not belonging to selected fragments are " 108 "replaced by `*' characters in output alignment " 109 "(rather than being printed in lower-case characters)"), 110 _Switch(["-mat", "mat"], 111 "Creates file *mat with substitution counts derived " 112 "from the fragments that have been selected for alignment."), 113 _Switch(["-mat_thr", "mat_thr"], 114 "Like '-mat' but only fragments with weight score " 115 "> t are considered"), 116 _Switch(["-max_link", "max_link"], 117 "'maximum linkage' clustering used to construct " 118 "sequence tree (instead of UPGMA)."), 119 _Switch(["-min_link", "min_link"], 120 "'minimum linkage' clustering used."), 121 _Option(["-mot", "mot"], 122 "'motif' option.", 123 equate=False), 124 _Switch(["-msf", "msf"], 125 "Separate output file in MSF format."), 126 _Switch(["-n", "n"], 127 "Input sequences are nucleic acid sequences. " 128 "No translation of fragments."), 129 _Switch(["-nt", "nt"], 130 "Input sequences are nucleic acid sequences and " 131 "`nucleic acid segments' are translated to `peptide " 132 "segments'."), 133 _Switch(["-nta", "nta"], 134 "`no textual alignment' - textual alignment suppressed. " 135 "This option makes sense if other output files are of " 136 "intrest -- e.g. the fragment files created with -ff, " 137 "-fop, -fsm or -lo."), 138 _Switch(["-o", "o"], 139 "Fast version, resulting alignments may be slightly " 140 "different."), 141 _Switch(["-ow", "ow"], 142 "Overlap weights enforced (By default, overlap weights " 143 "are used only if up to 35 sequences are aligned since " 144 "calculating overlap weights is time consuming)."), 145 _Switch(["-pst", "pst"], 146 "'print status'. Creates and updates a file *.sta with " 147 "information about the current status of the program " 148 "run. This option is recommended if large data sets " 149 "are aligned since it allows the user to estimate the " 150 "remaining running time."), 151 _Switch(["-smin", "smin"], 152 "Minimum similarity value for first residue pair " 153 "(or codon pair) in fragments. Speeds up protein " 154 "alignment or alignment of translated DNA fragments " 155 "at the expense of sensitivity."), 156 _Option(["-stars", "stars"], 157 "Maximum number of `*' characters indicating degree " 158 "of local similarity among sequences. By default, no " 159 "stars are used but numbers between 0 and 9, instead.", 160 checker_function = lambda x: x in range(0,10), 161 equate=False), 162 _Switch(["-stdo", "stdo"], 163 "Results written to standard output."), 164 _Switch(["-ta", "ta"], 165 "Standard textual alignment printed (overrides " 166 "suppression of textual alignments in special " 167 "options, e.g. -lgs)"), 168 _Option(["-thr", "thr"], 169 "Threshold T = x.", 170 checker_function = lambda x: isinstance(x, int), 171 equate=False), 172 _Switch(["-xfr", "xfr"], 173 "'exclude fragments' - list of fragments can be " 174 "specified that are NOT considered for pairwise alignment"), 175 _Argument(["input"], 176 "Input file name. Must be FASTA format", 177 filename=True, 178 is_required=True), 179 ] 180 AbstractCommandline.__init__(self, cmd, **kwargs)
181 182
183 -def _test():
184 """Run the module's doctests (PRIVATE).""" 185 print "Running modules doctests..." 186 import doctest 187 doctest.testmod() 188 print "Done"
189 190 if __name__ == "__main__": 191 _test() 192