Package Bio :: Package Align :: Package Applications :: Module _Clustalw
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Clustalw

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program Clustal W. 
  6  """ 
  7   
  8  __docformat__ = "epytext en"  # Don't just use plain text in epydoc API pages! 
  9   
 10  import os 
 11  from Bio.Application import _Option, _Switch, AbstractCommandline 
 12   
 13   
14 -class ClustalwCommandline(AbstractCommandline):
15 """Command line wrapper for clustalw (version one or two). 16 17 http://www.clustal.org/ 18 19 Example: 20 21 >>> from Bio.Align.Applications import ClustalwCommandline 22 >>> in_file = "unaligned.fasta" 23 >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file) 24 >>> print clustalw_cline 25 clustalw2 -infile=unaligned.fasta 26 27 You would typically run the command line with clustalw_cline() or via 28 the Python subprocess module, as described in the Biopython tutorial. 29 30 Citation: 31 32 Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, 33 McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, 34 Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0. 35 Bioinformatics, 23, 2947-2948. 36 37 Last checked against versions: 1.83 and 2.1 38 """ 39 #TODO - Should we default to cmd="clustalw2" now?
40 - def __init__(self, cmd="clustalw", **kwargs):
41 self.parameters = \ 42 [ 43 _Option(["-infile", "-INFILE", "INFILE", "infile"], 44 "Input sequences.", 45 filename=True), 46 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"], 47 "Profiles (old alignment).", 48 filename=True), 49 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"], 50 "Profiles (old alignment).", 51 filename=True), 52 ################## VERBS (do things) ############################# 53 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"], 54 "List the command line parameters"), 55 _Switch(["-help", "-HELP", "HELP", "help"], 56 "Outline the command line params."), 57 _Switch(["-check", "-CHECK", "CHECK", "check"], 58 "Outline the command line params."), 59 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"], 60 "Output full help content."), 61 _Switch(["-align", "-ALIGN", "ALIGN", "align"], 62 "Do full multiple alignment."), 63 _Switch(["-tree", "-TREE", "TREE", "tree"], 64 "Calculate NJ tree."), 65 _Switch(["-pim", "-PIM", "PIM", "pim"], 66 "Output percent identity matrix (while calculating the tree)."), 67 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], 68 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", 69 checker_function=lambda x: isinstance(x, int)), 70 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"], 71 "Output the input sequences in a different file format."), 72 ##################### PARAMETERS (set things) ######################### 73 # ***General settings:**** 74 # Makes no sense in biopython 75 #_Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"], 76 # [], 77 # lambda x: 0, #Does not take value 78 # False, 79 # "read command line, then enter normal interactive menus", 80 # False), 81 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], 82 "Use FAST algorithm for the alignment guide tree"), 83 _Option(["-type", "-TYPE", "TYPE", "type"], 84 "PROTEIN or DNA sequences", 85 checker_function=lambda x: x in ["PROTEIN", "DNA", 86 "protein", "dna"]), 87 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"], 88 "Protein alignment with negative values in matrix"), 89 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"], 90 "Output sequence alignment file name", 91 filename=True), 92 _Option(["-output", "-OUTPUT", "OUTPUT", "output"], 93 "Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA", 94 checker_function=lambda x: x in ["CLUSTAL", "GCG", "GDE", "PHYLIP", 95 "PIR", "NEXUS", "FASTA", 96 "clustal", "gcg", "gde", "phylip", 97 "pir", "nexus", "fasta"]), 98 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"], 99 "Output taxon order: INPUT or ALIGNED", 100 checker_function=lambda x: x in ["INPUT", "input", 101 "ALIGNED", "aligned"]), 102 _Option(["-case", "-CASE", "CASE", "case"], 103 "LOWER or UPPER (for GDE output only)", 104 checker_function=lambda x: x in ["UPPER", "upper", 105 "LOWER", "lower"]), 106 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], 107 "OFF or ON (for Clustal output only)", 108 checker_function=lambda x: x in ["ON", "on", 109 "OFF", "off"]), 110 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], 111 "OFF or ON (NEW- for all output formats)", 112 checker_function=lambda x: x in ["ON", "on", 113 "OFF", "off"]), 114 _Option(["-range", "-RANGE", "RANGE", "range"], 115 "Sequence range to write starting m to m+n. " 116 "Input as string eg. '24,200'"), 117 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], 118 "Maximum allowed input sequence length", 119 checker_function=lambda x: isinstance(x, int)), 120 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"], 121 "Reduce console output to minimum"), 122 _Option(["-stats", "-STATS", "STATS", "stats"], 123 "Log some alignment statistics to file", 124 filename=True), 125 # ***Fast Pairwise Alignments:*** 126 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], 127 "Word size", 128 checker_function=lambda x: isinstance(x, int) or 129 isinstance(x, float)), 130 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], 131 "Number of best diags.", 132 checker_function=lambda x: isinstance(x, int) or 133 isinstance(x, float)), 134 _Option(["-window", "-WINDOW", "WINDOW", "window"], 135 "Window around best diags.", 136 checker_function=lambda x: isinstance(x, int) or 137 isinstance(x, float)), 138 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], 139 "Gap penalty", 140 checker_function=lambda x: isinstance(x, int) or 141 isinstance(x, float)), 142 _Option(["-score", "-SCORE", "SCORE", "score"], 143 "Either: PERCENT or ABSOLUTE", 144 checker_function=lambda x: x in ["percent", "PERCENT", 145 "absolute","ABSOLUTE"]), 146 # ***Slow Pairwise Alignments:*** 147 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], 148 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 149 checker_function=lambda x: x in ["BLOSUM", "PAM", 150 "GONNET", "ID", 151 "blosum", "pam", 152 "gonnet", "id"] or 153 os.path.exists(x), 154 filename=True), 155 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], 156 "DNA weight matrix=IUB, CLUSTALW or filename", 157 checker_function=lambda x: x in ["IUB", "CLUSTALW", 158 "iub", "clustalw"] or 159 os.path.exists(x), 160 filename=True), 161 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], 162 "Gap opening penalty", 163 checker_function=lambda x: isinstance(x, int) or 164 isinstance(x, float)), 165 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], 166 "Gap extension penalty", 167 checker_function=lambda x: isinstance(x, int) or 168 isinstance(x, float)), 169 # ***Multiple Alignments:*** 170 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 171 "Output file name for newly created guide tree", 172 filename=True), 173 _Option(["-usetree", "-USETREE", "USETREE", "usetree"], 174 "File name of guide tree", 175 checker_function=lambda x: os.path.exists, 176 filename=True), 177 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"], 178 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 179 checker_function=lambda x: x in ["BLOSUM", "PAM", 180 "GONNET", "ID", 181 "blosum", "pam", 182 "gonnet", "id"] or 183 os.path.exists(x), 184 filename=True), 185 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], 186 "DNA weight matrix=IUB, CLUSTALW or filename", 187 checker_function=lambda x: x in ["IUB", "CLUSTALW", 188 "iub", "clustalw"] or 189 os.path.exists(x), 190 filename=True), 191 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], 192 "Gap opening penalty", 193 checker_function=lambda x: isinstance(x, int) or 194 isinstance(x, float)), 195 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"], 196 "Gap extension penalty", 197 checker_function=lambda x: isinstance(x, int) or 198 isinstance(x, float)), 199 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], 200 "No end gap separation pen."), 201 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], 202 "Gap separation pen. range", 203 checker_function=lambda x: isinstance(x, int) or 204 isinstance(x, float)), 205 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], 206 "Residue-specific gaps off"), 207 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], 208 "Hydrophilic gaps off"), 209 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], 210 "List hydrophilic res."), 211 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], 212 "% ident. for delay", 213 checker_function=lambda x: isinstance(x, int) or 214 isinstance(x, float)), 215 # Already handled in General Settings section, but appears a second 216 # time under Multiple Alignments in the help 217 #_Option(["-type", "-TYPE", "TYPE", "type"], 218 # "PROTEIN or DNA", 219 # checker_function=lambda x: x in ["PROTEIN", "DNA", 220 # "protein", "dna"]), 221 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], 222 "Transitions weighting", 223 checker_function=lambda x: isinstance(x, int) or 224 isinstance(x, float)), 225 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"], 226 "NONE or TREE or ALIGNMENT", 227 checker_function=lambda x: x in ["NONE", "TREE", 228 "ALIGNMENT", 229 "none", "tree", 230 "alignment"]), 231 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"], 232 "maximum number of iterations to perform", 233 checker_function=lambda x: isinstance(x, int)), 234 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], 235 "Disable sequence weighting"), 236 # ***Profile Alignments:*** 237 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"], 238 "Merge two alignments by profile alignment"), 239 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], 240 "Output file name for new guide tree of profile1", 241 filename=True), 242 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], 243 "Output file for new guide tree of profile2", 244 filename=True), 245 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"], 246 "File name of guide tree for profile1", 247 checker_function=lambda x: os.path.exists, 248 filename=True), 249 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"], 250 "File name of guide tree for profile2", 251 checker_function=lambda x: os.path.exists, 252 filename=True), 253 # ***Sequence to Profile Alignments:*** 254 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], 255 "Sequentially add profile2 sequences to profile1 alignment"), 256 # These are already handled in the Multiple Alignments section, 257 # but appear a second time here in the help. 258 #_Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 259 # "File for new guide tree", 260 # filename=True), 261 #_Option(["-usetree", "-USETREE", "USETREE", "usetree"], 262 # "File for old guide tree", 263 # checker_function=lambda x: os.path.exists, 264 # filename=True), 265 # ***Structure Alignments:*** 266 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"], 267 "Do not use secondary structure-gap penalty mask for profile 1"), 268 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"], 269 "Do not use secondary structure-gap penalty mask for profile 2"), 270 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], 271 "STRUCTURE or MASK or BOTH or NONE output in alignment file", 272 checker_function=lambda x: x in ["STRUCTURE", "MASK", 273 "BOTH", "NONE", 274 "structure", "mask", 275 "both", "none"]), 276 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], 277 "Gap penalty for helix core residues", 278 checker_function=lambda x: isinstance(x, int) or 279 isinstance(x, float)), 280 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], 281 "gap penalty for strand core residues", 282 checker_function=lambda x: isinstance(x, int) or 283 isinstance(x, float)), 284 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], 285 "Gap penalty for loop regions", 286 checker_function=lambda x: isinstance(x, int) or 287 isinstance(x, float)), 288 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], 289 "Gap penalty for structure termini", 290 checker_function=lambda x: isinstance(x, int) or 291 isinstance(x, float)), 292 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], 293 "Number of residues inside helix to be treated as terminal", 294 checker_function=lambda x: isinstance(x, int)), 295 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], 296 "Number of residues outside helix to be treated as terminal", 297 checker_function=lambda x: isinstance(x, int)), 298 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], 299 "Number of residues inside strand to be treated as terminal", 300 checker_function=lambda x: isinstance(x, int)), 301 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], 302 "Number of residues outside strand to be treated as terminal", 303 checker_function=lambda x: isinstance(x, int)), 304 # ***Trees:*** 305 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], 306 "nj OR phylip OR dist OR nexus", 307 checker_function=lambda x: x in ["NJ", "PHYLIP", 308 "DIST", "NEXUS", 309 "nj", "phylip", 310 "dist", "nexus"]), 311 _Option(["-seed", "-SEED", "SEED", "seed"], 312 "Seed number for bootstraps.", 313 checker_function=lambda x: isinstance(x, int)), 314 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"], 315 "Use Kimura's correction."), 316 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], 317 "Ignore positions with gaps."), 318 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], 319 "Node OR branch position of bootstrap values in tree display", 320 checker_function=lambda x: x in ["NODE", "BRANCH", 321 "node", "branch"]), 322 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], 323 "NJ or UPGMA", 324 checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"]) 325 ] 326 AbstractCommandline.__init__(self, cmd, **kwargs)
327 328
329 -def _test():
330 """Run the module's doctests (PRIVATE).""" 331 print "Running ClustalW doctests..." 332 import doctest 333 doctest.testmod() 334 print "Done"
335 336 if __name__ == "__main__": 337 _test() 338