Package Bio :: Package Align :: Package Applications :: Module _ClustalOmega
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._ClustalOmega

  1  # -*- coding: utf-8 -*- 
  2  # Copyright 2011 by Andreas Wilm. All rights reserved. 
  3  # Based on ClustalW wrapper copyright 2009 by Cymon J. Cox. 
  4  # 
  5  # Wrapper for Clustal Omega by Andreas Wilm (2011). Used _Clustalw.py 
  6  # as template. 
  7  # 
  8  # This code is part of the Biopython distribution and governed by its 
  9  # license.  Please see the LICENSE file that should have been included 
 10  # as part of this package. 
 11  """Command line wrapper for the multiple alignment program Clustal Omega. 
 12  """ 
 13   
 14  from __future__ import print_function 
 15   
 16  __docformat__ = "restructuredtext en"  # Don't just use plain text in epydoc API pages! 
 17   
 18  from Bio.Application import _Option, _Switch, AbstractCommandline 
 19   
 20   
21 -class ClustalOmegaCommandline(AbstractCommandline):
22 """Command line wrapper for clustal omega 23 24 http://www.clustal.org/omega 25 26 Example: 27 -------- 28 29 >>> from Bio.Align.Applications import ClustalOmegaCommandline 30 >>> in_file = "unaligned.fasta" 31 >>> out_file = "aligned.fasta" 32 >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True) 33 >>> print(clustalomega_cline) 34 clustalo -i unaligned.fasta -o aligned.fasta --auto -v 35 36 37 You would typically run the command line with clustalomega_cline() or via 38 the Python subprocess module, as described in the Biopython tutorial. 39 40 Citation: 41 --------- 42 43 Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R, 44 McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011). 45 Fast, scalable generation of high-quality protein multiple 46 sequence alignments using Clustal Omega. 47 Molecular Systems Biology 7:539 doi:10.1038/msb.2011.75 48 49 Last checked against versions: 1.2.0 50 """
51 - def __init__(self, cmd="clustalo", **kwargs):
52 # order parameters in the same order as clustalo --help 53 self.parameters = \ 54 [ 55 # Sequence Input 56 _Option(["-i", "--in", "--infile", "infile"], 57 "Multiple sequence input file", 58 filename=True, 59 equate=False), 60 _Option(["--hmm-in", "HMM input", "hmm_input"], 61 "HMM input files", 62 filename=True, 63 equate=False), 64 _Switch(["--dealign", "dealign"], 65 "Dealign input sequences"), 66 _Option(["--profile1", "--p1", "profile1"], 67 "Pre-aligned multiple sequence file (aligned columns will be kept fix).", 68 filename=True, 69 equate=False), 70 _Option(["--profile2", "--p2", "profile2"], 71 "Pre-aligned multiple sequence file (aligned columns will be kept fix).", 72 filename=True, 73 equate=False), 74 _Option(["-t", "--seqtype", "seqtype"], 75 "{Protein, RNA, DNA} Force a sequence type (default: auto).", 76 equate=False, 77 checker_function=lambda x: x in ["protein", "rna", "dna", 78 "Protein", "RNA", "DNA", 79 "PROTEIN"]), 80 _Switch(["--is-profile", "isprofile"], 81 "disable check if profile, force profile (default no)"), 82 _Option(["--infmt", "infmt"], 83 """Forced sequence input file format (default: auto) 84 85 Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna] 86 """, 87 equate=False, 88 checker_function=lambda x: x in ["a2m", "fa", "fasta", 89 "clu", "clustal", 90 "msf", 91 "phy", "phylip", 92 "selex", 93 "st", "stockholm", 94 "vie", "vienna"]), 95 96 # Clustering 97 _Option(["--distmat-in", "distmat_in"], 98 "Pairwise distance matrix input file (skips distance computation).", 99 filename=True, 100 equate=False), 101 _Option(["--distmat-out", "distmat_out"], 102 "Pairwise distance matrix output file.", 103 filename=True, 104 equate=False), 105 _Option(["--guidetree-in", "guidetree_in"], 106 "Guide tree input file (skips distance computation and guide-tree clustering step).", 107 filename=True, 108 equate=False), 109 _Option(["--guidetree-out", "guidetree_out"], 110 "Guide tree output file.", 111 filename=True, 112 equate=False), 113 _Switch(["--full", "distmat_full"], 114 "Use full distance matrix for guide-tree calculation (slow; mBed is default)"), 115 _Switch(["--full-iter", "distmat_full_iter"], 116 "Use full distance matrix for guide-tree calculation during iteration (mBed is default)"), 117 _Option(["--cluster-size", "clustersize"], 118 "soft maximum of sequences in sub-clusters", 119 checker_function=lambda x: isinstance(x, int)), 120 _Option(["--clustering-out", "clusteringout"], 121 "Clustering output file", 122 filename=True), 123 _Switch(["--use-kimura", "usekimura"], 124 "use Kimura distance correction for aligned sequences (default no)"), 125 _Switch(["--percent-id", "percentid"], 126 "convert distances into percent identities (default no)"), 127 128 # Alignment Output 129 _Option(["-o", "--out", "--outfile", "outfile"], 130 "Multiple sequence alignment output file (default: stdout).", 131 filename=True, 132 equate=False), 133 _Option(["--outfmt", "outfmt"], 134 "MSA output file format:" 135 " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]" 136 " (default: fasta).", 137 equate=False, 138 checker_function=lambda x: x in ["a2m", "fa", "fasta", 139 "clu", "clustal", 140 "msf", 141 "phy", "phylip", 142 "selex", 143 "st", "stockholm", 144 "vie", "vienna"]), 145 _Switch(["--residuenumber", "--resno", "residuenumber"], 146 "in Clustal format print residue numbers (default no)"), 147 _Option(["--wrap", "wrap"], 148 "number of residues before line-wrap in output", 149 checker_function=lambda x: isinstance(x, int)), 150 _Option(["--output-order", "outputorder"], 151 "MSA output order like in input/guide-tree", 152 checker_function=lambda x: x in ["input-order", "tree-order"]), 153 154 # Iteration 155 _Option(["--iterations", "--iter", "iterations"], 156 "Number of (combined guide-tree/HMM) iterations", 157 equate=False, 158 checker_function=lambda x: isinstance(x, int)), 159 _Option(["--max-guidetree-iterations", "max_guidetree_iterations"], 160 "Maximum number of guidetree iterations", 161 equate=False, 162 checker_function=lambda x: isinstance(x, int)), 163 _Option(["--max-hmm-iterations", "max_hmm_iterations"], 164 "Maximum number of HMM iterations", 165 equate=False, 166 checker_function=lambda x: isinstance(x, int)), 167 168 # Limits (will exit early, if exceeded): 169 _Option(["--maxnumseq", "maxnumseq"], 170 "Maximum allowed number of sequences", 171 equate=False, 172 checker_function=lambda x: isinstance(x, int)), 173 _Option(["--maxseqlen", "maxseqlen"], 174 "Maximum allowed sequence length", 175 equate=False, 176 checker_function=lambda x: isinstance(x, int)), 177 178 # Miscellaneous: 179 _Switch(["--auto", "auto"], 180 "Set options automatically (might overwrite some of your options)"), 181 _Option(["--threads", "threads"], 182 "Number of processors to use", 183 equate=False, 184 checker_function=lambda x: isinstance(x, int)), 185 _Option(["-l", "--log", "log"], 186 "Log all non-essential output to this file.", 187 filename=True, 188 equate=False), 189 _Switch(["-h", "--help", "help"], 190 "Print help and exit."), 191 _Switch(["-v", "--verbose", "verbose"], 192 "Verbose output"), 193 _Switch(["--version", "version"], 194 "Print version information and exit"), 195 _Switch(["--long-version", "long_version"], 196 "Print long version information and exit"), 197 _Switch(["--force", "force"], 198 "Force file overwriting."), 199 200 ] 201 AbstractCommandline.__init__(self, cmd, **kwargs)
202 203
204 -def _test():
205 """Run the module's doctests (PRIVATE).""" 206 print("Running ClustalOmega doctests...") 207 import doctest 208 doctest.testmod() 209 print("Done")
210 211 if __name__ == "__main__": 212 _test() 213