Package Bio :: Package Align :: Package Applications :: Module _Clustalw
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Clustalw

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program Clustal W. 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  __docformat__ = "epytext en"  # Don't just use plain text in epydoc API pages! 
 11   
 12  import os 
 13  from Bio.Application import _Option, _Switch, AbstractCommandline 
 14   
 15   
16 -class ClustalwCommandline(AbstractCommandline):
17 """Command line wrapper for clustalw (version one or two). 18 19 http://www.clustal.org/ 20 21 Example: 22 23 >>> from Bio.Align.Applications import ClustalwCommandline 24 >>> in_file = "unaligned.fasta" 25 >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file) 26 >>> print(clustalw_cline) 27 clustalw2 -infile=unaligned.fasta 28 29 You would typically run the command line with clustalw_cline() or via 30 the Python subprocess module, as described in the Biopython tutorial. 31 32 Citation: 33 34 Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, 35 McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, 36 Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0. 37 Bioinformatics, 23, 2947-2948. 38 39 Last checked against versions: 1.83 and 2.1 40 """ 41 #TODO - Should we default to cmd="clustalw2" now?
42 - def __init__(self, cmd="clustalw", **kwargs):
43 self.parameters = \ 44 [ 45 _Option(["-infile", "-INFILE", "INFILE", "infile"], 46 "Input sequences.", 47 filename=True), 48 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"], 49 "Profiles (old alignment).", 50 filename=True), 51 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"], 52 "Profiles (old alignment).", 53 filename=True), 54 ################## VERBS (do things) ############################# 55 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"], 56 "List the command line parameters"), 57 _Switch(["-help", "-HELP", "HELP", "help"], 58 "Outline the command line params."), 59 _Switch(["-check", "-CHECK", "CHECK", "check"], 60 "Outline the command line params."), 61 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"], 62 "Output full help content."), 63 _Switch(["-align", "-ALIGN", "ALIGN", "align"], 64 "Do full multiple alignment."), 65 _Switch(["-tree", "-TREE", "TREE", "tree"], 66 "Calculate NJ tree."), 67 _Switch(["-pim", "-PIM", "PIM", "pim"], 68 "Output percent identity matrix (while calculating the tree)."), 69 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], 70 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", 71 checker_function=lambda x: isinstance(x, int)), 72 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"], 73 "Output the input sequences in a different file format."), 74 ##################### PARAMETERS (set things) ######################### 75 # ***General settings:**** 76 # Makes no sense in biopython 77 #_Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"], 78 # [], 79 # lambda x: 0, #Does not take value 80 # False, 81 # "read command line, then enter normal interactive menus", 82 # False), 83 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], 84 "Use FAST algorithm for the alignment guide tree"), 85 _Option(["-type", "-TYPE", "TYPE", "type"], 86 "PROTEIN or DNA sequences", 87 checker_function=lambda x: x in ["PROTEIN", "DNA", 88 "protein", "dna"]), 89 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"], 90 "Protein alignment with negative values in matrix"), 91 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"], 92 "Output sequence alignment file name", 93 filename=True), 94 _Option(["-output", "-OUTPUT", "OUTPUT", "output"], 95 "Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA", 96 checker_function=lambda x: x in ["CLUSTAL", "GCG", "GDE", "PHYLIP", 97 "PIR", "NEXUS", "FASTA", 98 "clustal", "gcg", "gde", "phylip", 99 "pir", "nexus", "fasta"]), 100 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"], 101 "Output taxon order: INPUT or ALIGNED", 102 checker_function=lambda x: x in ["INPUT", "input", 103 "ALIGNED", "aligned"]), 104 _Option(["-case", "-CASE", "CASE", "case"], 105 "LOWER or UPPER (for GDE output only)", 106 checker_function=lambda x: x in ["UPPER", "upper", 107 "LOWER", "lower"]), 108 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], 109 "OFF or ON (for Clustal output only)", 110 checker_function=lambda x: x in ["ON", "on", 111 "OFF", "off"]), 112 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], 113 "OFF or ON (NEW- for all output formats)", 114 checker_function=lambda x: x in ["ON", "on", 115 "OFF", "off"]), 116 _Option(["-range", "-RANGE", "RANGE", "range"], 117 "Sequence range to write starting m to m+n. " 118 "Input as string eg. '24,200'"), 119 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], 120 "Maximum allowed input sequence length", 121 checker_function=lambda x: isinstance(x, int)), 122 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"], 123 "Reduce console output to minimum"), 124 _Option(["-stats", "-STATS", "STATS", "stats"], 125 "Log some alignment statistics to file", 126 filename=True), 127 # ***Fast Pairwise Alignments:*** 128 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], 129 "Word size", 130 checker_function=lambda x: isinstance(x, int) or 131 isinstance(x, float)), 132 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], 133 "Number of best diags.", 134 checker_function=lambda x: isinstance(x, int) or 135 isinstance(x, float)), 136 _Option(["-window", "-WINDOW", "WINDOW", "window"], 137 "Window around best diags.", 138 checker_function=lambda x: isinstance(x, int) or 139 isinstance(x, float)), 140 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], 141 "Gap penalty", 142 checker_function=lambda x: isinstance(x, int) or 143 isinstance(x, float)), 144 _Option(["-score", "-SCORE", "SCORE", "score"], 145 "Either: PERCENT or ABSOLUTE", 146 checker_function=lambda x: x in ["percent", "PERCENT", 147 "absolute", "ABSOLUTE"]), 148 # ***Slow Pairwise Alignments:*** 149 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], 150 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 151 checker_function=lambda x: x in ["BLOSUM", "PAM", 152 "GONNET", "ID", 153 "blosum", "pam", 154 "gonnet", "id"] or 155 os.path.exists(x), 156 filename=True), 157 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], 158 "DNA weight matrix=IUB, CLUSTALW or filename", 159 checker_function=lambda x: x in ["IUB", "CLUSTALW", 160 "iub", "clustalw"] or 161 os.path.exists(x), 162 filename=True), 163 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], 164 "Gap opening penalty", 165 checker_function=lambda x: isinstance(x, int) or 166 isinstance(x, float)), 167 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], 168 "Gap extension penalty", 169 checker_function=lambda x: isinstance(x, int) or 170 isinstance(x, float)), 171 # ***Multiple Alignments:*** 172 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 173 "Output file name for newly created guide tree", 174 filename=True), 175 _Option(["-usetree", "-USETREE", "USETREE", "usetree"], 176 "File name of guide tree", 177 checker_function=lambda x: os.path.exists, 178 filename=True), 179 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"], 180 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 181 checker_function=lambda x: x in ["BLOSUM", "PAM", 182 "GONNET", "ID", 183 "blosum", "pam", 184 "gonnet", "id"] or 185 os.path.exists(x), 186 filename=True), 187 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], 188 "DNA weight matrix=IUB, CLUSTALW or filename", 189 checker_function=lambda x: x in ["IUB", "CLUSTALW", 190 "iub", "clustalw"] or 191 os.path.exists(x), 192 filename=True), 193 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], 194 "Gap opening penalty", 195 checker_function=lambda x: isinstance(x, int) or 196 isinstance(x, float)), 197 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"], 198 "Gap extension penalty", 199 checker_function=lambda x: isinstance(x, int) or 200 isinstance(x, float)), 201 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], 202 "No end gap separation pen."), 203 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], 204 "Gap separation pen. range", 205 checker_function=lambda x: isinstance(x, int) or 206 isinstance(x, float)), 207 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], 208 "Residue-specific gaps off"), 209 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], 210 "Hydrophilic gaps off"), 211 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], 212 "List hydrophilic res."), 213 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], 214 "% ident. for delay", 215 checker_function=lambda x: isinstance(x, int) or 216 isinstance(x, float)), 217 # Already handled in General Settings section, but appears a second 218 # time under Multiple Alignments in the help 219 #_Option(["-type", "-TYPE", "TYPE", "type"], 220 # "PROTEIN or DNA", 221 # checker_function=lambda x: x in ["PROTEIN", "DNA", 222 # "protein", "dna"]), 223 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], 224 "Transitions weighting", 225 checker_function=lambda x: isinstance(x, int) or 226 isinstance(x, float)), 227 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"], 228 "NONE or TREE or ALIGNMENT", 229 checker_function=lambda x: x in ["NONE", "TREE", 230 "ALIGNMENT", 231 "none", "tree", 232 "alignment"]), 233 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"], 234 "maximum number of iterations to perform", 235 checker_function=lambda x: isinstance(x, int)), 236 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], 237 "Disable sequence weighting"), 238 # ***Profile Alignments:*** 239 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"], 240 "Merge two alignments by profile alignment"), 241 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], 242 "Output file name for new guide tree of profile1", 243 filename=True), 244 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], 245 "Output file for new guide tree of profile2", 246 filename=True), 247 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"], 248 "File name of guide tree for profile1", 249 checker_function=lambda x: os.path.exists, 250 filename=True), 251 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"], 252 "File name of guide tree for profile2", 253 checker_function=lambda x: os.path.exists, 254 filename=True), 255 # ***Sequence to Profile Alignments:*** 256 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], 257 "Sequentially add profile2 sequences to profile1 alignment"), 258 # These are already handled in the Multiple Alignments section, 259 # but appear a second time here in the help. 260 #_Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 261 # "File for new guide tree", 262 # filename=True), 263 #_Option(["-usetree", "-USETREE", "USETREE", "usetree"], 264 # "File for old guide tree", 265 # checker_function=lambda x: os.path.exists, 266 # filename=True), 267 # ***Structure Alignments:*** 268 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"], 269 "Do not use secondary structure-gap penalty mask for profile 1"), 270 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"], 271 "Do not use secondary structure-gap penalty mask for profile 2"), 272 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], 273 "STRUCTURE or MASK or BOTH or NONE output in alignment file", 274 checker_function=lambda x: x in ["STRUCTURE", "MASK", 275 "BOTH", "NONE", 276 "structure", "mask", 277 "both", "none"]), 278 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], 279 "Gap penalty for helix core residues", 280 checker_function=lambda x: isinstance(x, int) or 281 isinstance(x, float)), 282 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], 283 "gap penalty for strand core residues", 284 checker_function=lambda x: isinstance(x, int) or 285 isinstance(x, float)), 286 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], 287 "Gap penalty for loop regions", 288 checker_function=lambda x: isinstance(x, int) or 289 isinstance(x, float)), 290 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], 291 "Gap penalty for structure termini", 292 checker_function=lambda x: isinstance(x, int) or 293 isinstance(x, float)), 294 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], 295 "Number of residues inside helix to be treated as terminal", 296 checker_function=lambda x: isinstance(x, int)), 297 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], 298 "Number of residues outside helix to be treated as terminal", 299 checker_function=lambda x: isinstance(x, int)), 300 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], 301 "Number of residues inside strand to be treated as terminal", 302 checker_function=lambda x: isinstance(x, int)), 303 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], 304 "Number of residues outside strand to be treated as terminal", 305 checker_function=lambda x: isinstance(x, int)), 306 # ***Trees:*** 307 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], 308 "nj OR phylip OR dist OR nexus", 309 checker_function=lambda x: x in ["NJ", "PHYLIP", 310 "DIST", "NEXUS", 311 "nj", "phylip", 312 "dist", "nexus"]), 313 _Option(["-seed", "-SEED", "SEED", "seed"], 314 "Seed number for bootstraps.", 315 checker_function=lambda x: isinstance(x, int)), 316 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"], 317 "Use Kimura's correction."), 318 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], 319 "Ignore positions with gaps."), 320 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], 321 "Node OR branch position of bootstrap values in tree display", 322 checker_function=lambda x: x in ["NODE", "BRANCH", 323 "node", "branch"]), 324 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], 325 "NJ or UPGMA", 326 checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"]) 327 ] 328 AbstractCommandline.__init__(self, cmd, **kwargs)
329 330
331 -def _test():
332 """Run the module's doctests (PRIVATE).""" 333 print("Running ClustalW doctests...") 334 import doctest 335 doctest.testmod() 336 print("Done")
337 338 if __name__ == "__main__": 339 _test() 340