Package Bio :: Package Align :: Package Applications :: Module _Clustalw
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Clustalw

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program Clustal W. 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  __docformat__ = "restructuredtext en"  # Don't just use plain text in epydoc API pages! 
 11   
 12  import os 
 13  from Bio.Application import _Option, _Switch, AbstractCommandline 
 14   
 15   
16 -class ClustalwCommandline(AbstractCommandline):
17 """Command line wrapper for clustalw (version one or two). 18 19 http://www.clustal.org/ 20 21 Example: 22 -------- 23 24 >>> from Bio.Align.Applications import ClustalwCommandline 25 >>> in_file = "unaligned.fasta" 26 >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file) 27 >>> print(clustalw_cline) 28 clustalw2 -infile=unaligned.fasta 29 30 You would typically run the command line with clustalw_cline() or via 31 the Python subprocess module, as described in the Biopython tutorial. 32 33 Citation: 34 --------- 35 36 Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, 37 McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, 38 Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0. 39 Bioinformatics, 23, 2947-2948. 40 41 Last checked against versions: 1.83 and 2.1 42 """ 43 # TODO - Should we default to cmd="clustalw2" now?
44 - def __init__(self, cmd="clustalw", **kwargs):
45 self.parameters = \ 46 [ 47 _Option(["-infile", "-INFILE", "INFILE", "infile"], 48 "Input sequences.", 49 filename=True), 50 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"], 51 "Profiles (old alignment).", 52 filename=True), 53 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"], 54 "Profiles (old alignment).", 55 filename=True), 56 # ################# VERBS (do things) ############################# 57 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"], 58 "List the command line parameters"), 59 _Switch(["-help", "-HELP", "HELP", "help"], 60 "Outline the command line params."), 61 _Switch(["-check", "-CHECK", "CHECK", "check"], 62 "Outline the command line params."), 63 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"], 64 "Output full help content."), 65 _Switch(["-align", "-ALIGN", "ALIGN", "align"], 66 "Do full multiple alignment."), 67 _Switch(["-tree", "-TREE", "TREE", "tree"], 68 "Calculate NJ tree."), 69 _Switch(["-pim", "-PIM", "PIM", "pim"], 70 "Output percent identity matrix (while calculating the tree)."), 71 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], 72 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", 73 checker_function=lambda x: isinstance(x, int)), 74 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"], 75 "Output the input sequences in a different file format."), 76 # #################### PARAMETERS (set things) ######################### 77 # ***General settings:**** 78 # Makes no sense in biopython 79 # _Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"], 80 # [], 81 # lambda x: 0, # Does not take value 82 # False, 83 # "read command line, then enter normal interactive menus", 84 # False), 85 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], 86 "Use FAST algorithm for the alignment guide tree"), 87 _Option(["-type", "-TYPE", "TYPE", "type"], 88 "PROTEIN or DNA sequences", 89 checker_function=lambda x: x in ["PROTEIN", "DNA", 90 "protein", "dna"]), 91 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"], 92 "Protein alignment with negative values in matrix"), 93 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"], 94 "Output sequence alignment file name", 95 filename=True), 96 _Option(["-output", "-OUTPUT", "OUTPUT", "output"], 97 "Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA", 98 checker_function=lambda x: x in ["CLUSTAL", "GCG", "GDE", "PHYLIP", 99 "PIR", "NEXUS", "FASTA", 100 "clustal", "gcg", "gde", "phylip", 101 "pir", "nexus", "fasta"]), 102 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"], 103 "Output taxon order: INPUT or ALIGNED", 104 checker_function=lambda x: x in ["INPUT", "input", 105 "ALIGNED", "aligned"]), 106 _Option(["-case", "-CASE", "CASE", "case"], 107 "LOWER or UPPER (for GDE output only)", 108 checker_function=lambda x: x in ["UPPER", "upper", 109 "LOWER", "lower"]), 110 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], 111 "OFF or ON (for Clustal output only)", 112 checker_function=lambda x: x in ["ON", "on", 113 "OFF", "off"]), 114 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], 115 "OFF or ON (NEW- for all output formats)", 116 checker_function=lambda x: x in ["ON", "on", 117 "OFF", "off"]), 118 _Option(["-range", "-RANGE", "RANGE", "range"], 119 "Sequence range to write starting m to m+n. " 120 "Input as string eg. '24,200'"), 121 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], 122 "Maximum allowed input sequence length", 123 checker_function=lambda x: isinstance(x, int)), 124 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"], 125 "Reduce console output to minimum"), 126 _Option(["-stats", "-STATS", "STATS", "stats"], 127 "Log some alignment statistics to file", 128 filename=True), 129 # ***Fast Pairwise Alignments:*** 130 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], 131 "Word size", 132 checker_function=lambda x: isinstance(x, int) or 133 isinstance(x, float)), 134 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], 135 "Number of best diags.", 136 checker_function=lambda x: isinstance(x, int) or 137 isinstance(x, float)), 138 _Option(["-window", "-WINDOW", "WINDOW", "window"], 139 "Window around best diags.", 140 checker_function=lambda x: isinstance(x, int) or 141 isinstance(x, float)), 142 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], 143 "Gap penalty", 144 checker_function=lambda x: isinstance(x, int) or 145 isinstance(x, float)), 146 _Option(["-score", "-SCORE", "SCORE", "score"], 147 "Either: PERCENT or ABSOLUTE", 148 checker_function=lambda x: x in ["percent", "PERCENT", 149 "absolute", "ABSOLUTE"]), 150 # ***Slow Pairwise Alignments:*** 151 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], 152 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 153 checker_function=lambda x: x in ["BLOSUM", "PAM", 154 "GONNET", "ID", 155 "blosum", "pam", 156 "gonnet", "id"] or 157 os.path.exists(x), 158 filename=True), 159 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], 160 "DNA weight matrix=IUB, CLUSTALW or filename", 161 checker_function=lambda x: x in ["IUB", "CLUSTALW", 162 "iub", "clustalw"] or 163 os.path.exists(x), 164 filename=True), 165 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], 166 "Gap opening penalty", 167 checker_function=lambda x: isinstance(x, int) or 168 isinstance(x, float)), 169 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], 170 "Gap extension penalty", 171 checker_function=lambda x: isinstance(x, int) or 172 isinstance(x, float)), 173 # ***Multiple Alignments:*** 174 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 175 "Output file name for newly created guide tree", 176 filename=True), 177 _Option(["-usetree", "-USETREE", "USETREE", "usetree"], 178 "File name of guide tree", 179 checker_function=lambda x: os.path.exists, 180 filename=True), 181 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"], 182 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 183 checker_function=lambda x: x in ["BLOSUM", "PAM", 184 "GONNET", "ID", 185 "blosum", "pam", 186 "gonnet", "id"] or 187 os.path.exists(x), 188 filename=True), 189 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], 190 "DNA weight matrix=IUB, CLUSTALW or filename", 191 checker_function=lambda x: x in ["IUB", "CLUSTALW", 192 "iub", "clustalw"] or 193 os.path.exists(x), 194 filename=True), 195 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], 196 "Gap opening penalty", 197 checker_function=lambda x: isinstance(x, int) or 198 isinstance(x, float)), 199 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"], 200 "Gap extension penalty", 201 checker_function=lambda x: isinstance(x, int) or 202 isinstance(x, float)), 203 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], 204 "No end gap separation pen."), 205 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], 206 "Gap separation pen. range", 207 checker_function=lambda x: isinstance(x, int) or 208 isinstance(x, float)), 209 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], 210 "Residue-specific gaps off"), 211 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], 212 "Hydrophilic gaps off"), 213 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], 214 "List hydrophilic res."), 215 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], 216 "% ident. for delay", 217 checker_function=lambda x: isinstance(x, int) or 218 isinstance(x, float)), 219 # Already handled in General Settings section, but appears a second 220 # time under Multiple Alignments in the help 221 # _Option(["-type", "-TYPE", "TYPE", "type"], 222 # "PROTEIN or DNA", 223 # checker_function=lambda x: x in ["PROTEIN", "DNA", 224 # "protein", "dna"]), 225 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], 226 "Transitions weighting", 227 checker_function=lambda x: isinstance(x, int) or 228 isinstance(x, float)), 229 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"], 230 "NONE or TREE or ALIGNMENT", 231 checker_function=lambda x: x in ["NONE", "TREE", 232 "ALIGNMENT", 233 "none", "tree", 234 "alignment"]), 235 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"], 236 "maximum number of iterations to perform", 237 checker_function=lambda x: isinstance(x, int)), 238 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], 239 "Disable sequence weighting"), 240 # ***Profile Alignments:*** 241 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"], 242 "Merge two alignments by profile alignment"), 243 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], 244 "Output file name for new guide tree of profile1", 245 filename=True), 246 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], 247 "Output file for new guide tree of profile2", 248 filename=True), 249 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"], 250 "File name of guide tree for profile1", 251 checker_function=lambda x: os.path.exists, 252 filename=True), 253 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"], 254 "File name of guide tree for profile2", 255 checker_function=lambda x: os.path.exists, 256 filename=True), 257 # ***Sequence to Profile Alignments:*** 258 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], 259 "Sequentially add profile2 sequences to profile1 alignment"), 260 # These are already handled in the Multiple Alignments section, 261 # but appear a second time here in the help. 262 # _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 263 # "File for new guide tree", 264 # filename=True), 265 # _Option(["-usetree", "-USETREE", "USETREE", "usetree"], 266 # "File for old guide tree", 267 # checker_function=lambda x: os.path.exists, 268 # filename=True), 269 # ***Structure Alignments:*** 270 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"], 271 "Do not use secondary structure-gap penalty mask for profile 1"), 272 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"], 273 "Do not use secondary structure-gap penalty mask for profile 2"), 274 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], 275 "STRUCTURE or MASK or BOTH or NONE output in alignment file", 276 checker_function=lambda x: x in ["STRUCTURE", "MASK", 277 "BOTH", "NONE", 278 "structure", "mask", 279 "both", "none"]), 280 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], 281 "Gap penalty for helix core residues", 282 checker_function=lambda x: isinstance(x, int) or 283 isinstance(x, float)), 284 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], 285 "gap penalty for strand core residues", 286 checker_function=lambda x: isinstance(x, int) or 287 isinstance(x, float)), 288 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], 289 "Gap penalty for loop regions", 290 checker_function=lambda x: isinstance(x, int) or 291 isinstance(x, float)), 292 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], 293 "Gap penalty for structure termini", 294 checker_function=lambda x: isinstance(x, int) or 295 isinstance(x, float)), 296 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], 297 "Number of residues inside helix to be treated as terminal", 298 checker_function=lambda x: isinstance(x, int)), 299 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], 300 "Number of residues outside helix to be treated as terminal", 301 checker_function=lambda x: isinstance(x, int)), 302 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], 303 "Number of residues inside strand to be treated as terminal", 304 checker_function=lambda x: isinstance(x, int)), 305 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], 306 "Number of residues outside strand to be treated as terminal", 307 checker_function=lambda x: isinstance(x, int)), 308 # ***Trees:*** 309 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], 310 "nj OR phylip OR dist OR nexus", 311 checker_function=lambda x: x in ["NJ", "PHYLIP", 312 "DIST", "NEXUS", 313 "nj", "phylip", 314 "dist", "nexus"]), 315 _Option(["-seed", "-SEED", "SEED", "seed"], 316 "Seed number for bootstraps.", 317 checker_function=lambda x: isinstance(x, int)), 318 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"], 319 "Use Kimura's correction."), 320 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], 321 "Ignore positions with gaps."), 322 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], 323 "Node OR branch position of bootstrap values in tree display", 324 checker_function=lambda x: x in ["NODE", "BRANCH", 325 "node", "branch"]), 326 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], 327 "NJ or UPGMA", 328 checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"]) 329 ] 330 AbstractCommandline.__init__(self, cmd, **kwargs)
331 332
333 -def _test():
334 """Run the module's doctests (PRIVATE).""" 335 print("Running ClustalW doctests...") 336 import doctest 337 doctest.testmod() 338 print("Done")
339 340 if __name__ == "__main__": 341 _test() 342