Package Bio :: Package motifs :: Package applications :: Module _xxmotif
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.applications._xxmotif

  1  # -*- coding: utf-8 -*- 
  2  # Copyright 2012 by Christian Brueffer.  All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """Command line wrapper for the motif finding program XXmotif.""" 
  8   
  9  from __future__ import print_function 
 10   
 11  import os 
 12  from Bio.Application import AbstractCommandline, _Option, _Switch, _Argument 
 13   
 14   
15 -class XXmotifCommandline(AbstractCommandline):
16 """Command line wrapper for XXmotif. 17 18 http://xxmotif.genzentrum.lmu.de/ 19 20 Example: 21 22 >>> from Bio.motifs.applications import XXmotifCommandline 23 >>> out_dir = "results" 24 >>> in_file = "sequences.fasta" 25 >>> xxmotif_cline = XXmotifCommandline(outdir=out_dir, seqfile=in_file, revcomp=True) 26 >>> print(xxmotif_cline) 27 XXmotif results sequences.fasta --revcomp 28 29 You would typically run the command line with xxmotif_cline() or via 30 the Python subprocess module, as described in the Biopython tutorial. 31 32 Citations: 33 34 Luehr S, Hartmann H, and Söding J. The XXmotif web server for eXhaustive, 35 weight matriX-based motif discovery in nucleotide sequences, 36 Nucleic Acids Res. 40: W104-W109 (2012). 37 38 Hartmann H, Guthoehrlein EW, Siebert M., Luehr S, and Söding J. P-value 39 based regulatory motif discovery using positional weight matrices 40 (to be published) 41 42 Last checked against version: 1.3 43 """ 44
45 - def __init__(self, cmd="XXmotif", **kwargs):
46 # order of parameters is the same as in XXmotif --help 47 _valid_alphabet = set("ACGTNX") 48 49 self.parameters = \ 50 [ 51 _Argument(["outdir", "OUTDIR"], 52 "output directory for all results", 53 filename=True, 54 is_required=True, 55 # XXmotif currently does not accept spaces in the outdir name 56 checker_function=lambda x: " " not in x), 57 _Argument(["seqfile", "SEQFILE"], 58 "file name with sequences from positive set in FASTA format", 59 filename=True, 60 is_required=True, 61 # XXmotif currently only accepts a pure filename 62 checker_function=lambda x: os.path.split(x)[0] == ""), 63 64 # Options 65 _Option(["--negSet", "negSet", "NEGSET", "negset"], 66 "sequence set which has to be used as a reference set", 67 filename=True, 68 equate=False), 69 _Switch(["--zoops", "ZOOPS", "zoops"], 70 "use zero-or-one occurrence per sequence model (DEFAULT)"), 71 _Switch(["--mops", "MOPS", "mops"], 72 "use multiple occurrence per sequence model"), 73 _Switch(["--oops", "OOPS", "oops"], 74 "use one occurrence per sequence model"), 75 _Switch(["--revcomp", "REVCOMP", "revcomp"], 76 "search in reverse complement of sequences as well (DEFAULT: NO)"), 77 _Option(["--background-model-order", "background-model-order", "BACKGROUND-MODEL-ORDER", 78 "background_model_order"], 79 "order of background distribution (DEFAULT: 2, 8(--negset) )", 80 checker_function=lambda x: isinstance(x, int), 81 equate=False), 82 _Option(["--pseudo", "PSEUDO", "pseudo"], 83 "percentage of pseudocounts used (DEFAULT: 10)", 84 checker_function=lambda x: isinstance(x, int), 85 equate=False), 86 _Option(["-g", "--gaps", "GAPS", "gaps"], 87 "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)", 88 checker_function=lambda x: x in [0-3], 89 equate=False), 90 _Option(["--type", "TYPE", "type"], 91 "defines what kind of start seeds are used (DEFAULT: ALL)" 92 "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM", 93 checker_function=lambda x: x in ["ALL", "all", 94 "FIVEMERS", "fivemers", 95 "PALINDROME", "palindrome", 96 "TANDEM", "tandem", 97 "NOPALINDROME", "nopalindrome", 98 "NOTANDEM", "notandem"], 99 equate=False), 100 _Option(["--merge-motif-threshold", "merge-motif-threshold", "MERGE-MOTIF-THRESHOLD", 101 "merge_motif_threshold"], 102 "defines the similarity threshold for merging motifs (DEFAULT: HIGH)" 103 "possible modes: LOW, MEDIUM, HIGH", 104 checker_function=lambda x: x in ["LOW", "low", 105 "MEDIUM", "medium", 106 "HIGH", "high"], 107 equate=False), 108 _Switch(["--no-pwm-length-optimization", "no-pwm-length-optimization", "NO-PWM-LENGTH-OPTIMIZATION", 109 "no_pwm_length_optimization"], 110 "do not optimize length during iterations (runtime advantages)"), 111 _Option(["--max-match-positions", "max-match-positions", "MAX-MATCH-POSITIONS", 112 "max_match_positions"], 113 "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)", 114 checker_function=lambda x: isinstance(x, int), 115 equate=False), 116 _Switch(["--batch", "BATCH", "batch"], 117 "suppress progress bars (reduce output size for batch jobs)"), 118 _Option(["--maxPosSetSize", "maxPosSetSize", "MAXPOSSETSIZE", "maxpossetsize"], 119 "maximum number of sequences from the positive set used [DEFAULT: all]", 120 checker_function=lambda x: isinstance(x, int), 121 equate=False), 122 # does not make sense in biopython 123 # _Switch(["--help", "help", "HELP"], 124 # "print this help page"), 125 _Option(["--trackedMotif", "trackedMotif", "TRACKEDMOTIF", "trackedmotif"], 126 "inspect extensions and refinement of a given seed (DEFAULT: not used)", 127 checker_function=lambda x: any((c in _valid_alphabet) for c in x), 128 equate=False), 129 130 # Using conservation information 131 _Option(["--format", "FORMAT", "format"], 132 "defines what kind of format the input sequences have (DEFAULT: FASTA)", 133 checker_function=lambda x: x in ["FASTA", "fasta", 134 "MFASTA", "mfasta"], 135 equate=False), 136 _Option(["--maxMultipleSequences", "maxMultipleSequences", "MAXMULTIPLESEQUENCES", 137 "maxmultiplesequences"], 138 "maximum number of sequences used in an alignment [DEFAULT: all]", 139 checker_function=lambda x: isinstance(x, int), 140 equate=False), 141 142 # Using localization information 143 _Switch(["--localization", "LOCALIZATION", "localization"], 144 "use localization information to calculate combined P-values" 145 "(sequences should have all the same length)"), 146 _Option(["--downstream", "DOWNSTREAM", "downstream"], 147 "number of residues in positive set downstream of anchor point (DEFAULT: 0)", 148 checker_function=lambda x: isinstance(x, int), 149 equate=False), 150 151 # Start with self defined motif 152 _Option(["-m", "--startMotif", "startMotif", "STARTMOTIF", "startmotif"], 153 "Start motif (IUPAC characters)", 154 checker_function=lambda x: any((c in _valid_alphabet) for c in x), 155 equate=False), 156 _Option(["-p", "--profileFile", "profileFile", "PROFILEFILE", "profilefile"], 157 "profile file", 158 filename=True, 159 equate=False), 160 _Option(["--startRegion", "startRegion", "STARTREGION", "startregion"], 161 "expected start position for motif occurrences relative to anchor point (--localization)", 162 checker_function=lambda x: isinstance(x, int), 163 equate=False), 164 _Option(["--endRegion", "endRegion", "ENDREGION", "endregion"], 165 "expected end position for motif occurrences relative to anchor point (--localization)", 166 checker_function=lambda x: isinstance(x, int), 167 equate=False), 168 169 # XXmotif wrapper options 170 _Switch(["--XXmasker", "masker"], 171 "mask the input sequences for homology, repeats and low complexity regions"), 172 _Switch(["--XXmasker-pos", "maskerpos"], 173 "mask only the positive set for homology, repeats and low complexity regions"), 174 _Switch(["--no-graphics", "nographics"], 175 "run XXmotif without graphical output"), 176 ] 177 AbstractCommandline.__init__(self, cmd, **kwargs)
178 179
180 -def _test():
181 """Run the module's doctests (PRIVATE).""" 182 print("Running XXmotif doctests...") 183 import doctest 184 doctest.testmod() 185 print("Done")
186 187 188 if __name__ == "__main__": 189 _test() 190