Package jazzparser :: Package utils :: Module scripts
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.utils.scripts

  1  """Utilities for bin scripts. 
  2   
  3  These are used by scripts outside the Jazz Parser packages, in  
  4  <PROJECT_ROOT>/bin. 
  5   
  6  """ 
  7  """ 
  8  ============================== License ======================================== 
  9   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 10    
 11   This file is part of The Jazz Parser. 
 12    
 13   The Jazz Parser is free software: you can redistribute it and/or modify 
 14   it under the terms of the GNU General Public License as published by 
 15   the Free Software Foundation, either version 3 of the License, or 
 16   (at your option) any later version. 
 17    
 18   The Jazz Parser is distributed in the hope that it will be useful, 
 19   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 21   GNU General Public License for more details. 
 22    
 23   You should have received a copy of the GNU General Public License 
 24   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 25   
 26  ============================ End license ====================================== 
 27   
 28  """ 
 29  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
 30   
31 -def prepare_evaluation_options(usage=None, description=None, 32 optparse_options=[], check_args=None, optparse_groups=[]):
33 """ 34 Various tasks common to the initial part of the evaluation routine 35 scripts (C{models/eval.py}). 36 37 @todo: This is not used any more. Remove it, after checking it's definitely 38 not used. 39 40 @param usage: the optparse usage string 41 @param description: the optparse description string 42 @type optparse_options: list of tuples 43 @param optparse_options: (args,kwargs) pairs to add additional 44 options to the optparse parser. 45 @type check_args: function 46 @param check_args: function to take the command-line arguments and 47 check them. This will be called early in the script. Must 48 return a tuple of (1) the model name (or model basename) that 49 will be used in the partition model names and (2) the input 50 filename to get sequences from. 51 @type optparse_groups: list of pairs 52 @param optparse_groups: specificatios for option groups to add to the 53 optparse option parser. The first of each pair is a tuple of 54 args to C{OptionGroup}'s init (excluding the first). 55 The second is a list of options 56 each formatted as C{optparse_options}. 57 58 @rtype: tuple 59 @return: (1) list of (sequences,model_name,partition_index) tuples 60 for each partition; (2) list of lists containing the sequence 61 ids for each partition; (3) optparse options; (4) optparse 62 arguments. 63 64 """ 65 import sys 66 from optparse import OptionParser, OptionGroup 67 from jazzparser.utils.config import parse_args_with_config 68 from jazzparser.utils.loggers import init_logging 69 from jazzparser.data.db_mirrors import SequenceIndex 70 from jazzparser.utils.data import partition 71 72 parser = OptionParser(usage=usage, description=description) 73 group = OptionGroup(parser, "Input", "Input data and partitioning for evaluation") 74 group.add_option("-s", "--sequence", dest="sequence", action="store", help="limit the evaluation to just one sequence, with the given index in the input file") 75 group.add_option("--partition", dest="partition", action="store", help="restrict to only one partition of the data. Specify as i/n, where i is the partition number and n the total number of partitions.") 76 group.add_option("-p", "--partitions", dest="partitions", type="int", action="store", help="test on all n partitions of the data, using a different model for each. Will look for a model <NAME>i, where <NAME> is the given model name and i the partition number.") 77 parser.add_option_group(group) 78 79 parser.add_option("--debug", dest="debug", action="store_true", help="show debugging output") 80 81 # Add the options according to their specs 82 for args,kwargs in optparse_options: 83 parser.add_option(*args, **kwargs) 84 85 # Add groups and their options 86 for group_args,options in optparse_groups: 87 # Check whether the group already exists 88 same_titles = [g for g in parser.option_groups if g.title == group_args[0]] 89 if same_titles: 90 group = same_titles[0] 91 else: 92 group = OptionGroup(parser, *group_args) 93 parser.add_option_group(group) 94 # Add options to this group 95 for args,kwargs in options: 96 group.add_option(*args, **kwargs) 97 options, arguments = parse_args_with_config(parser) 98 99 if check_args is None: 100 raise ValueError, "could not check arguments and get model "\ 101 "name. check_args must not be None" 102 model_name,input_filename = check_args(arguments) 103 104 if options.debug: 105 # Set the log level to debug and do the standard logging init 106 init_logging(logging.DEBUG) 107 else: 108 init_logging() 109 110 # Load up sequences 111 seqs = SequenceIndex.from_file(input_filename) 112 113 def _get_seq_by_index(index): 114 seq = seqs.sequence_by_index(index) 115 if seq is None: 116 print >>sys.stderr, "There are only %d sequences" % len(seqs) 117 sys.exit(1) 118 return seq
119 120 ################ Data partitioning #################### 121 if options.partitions is not None: 122 # Divide the data up into n partitions and use a different model name for each 123 total_parts = options.partitions 124 print >>sys.stderr, "Cross validation: dividing test data into %d partitions" % total_parts 125 partitions = [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))] 126 part_ids = partition(seqs.ids, total_parts) 127 elif options.partition is not None: 128 # Just select one partition 129 # Split up the argument to get two integers 130 parti,total_parts = options.partition.split("/") 131 parti,total_parts = int(parti), int(total_parts) 132 print >>sys.stderr, "Restricting sequences to %d-way partition %d" % (total_parts,parti) 133 # Get a list of sequence indices to restrict our set to 134 part_ids = partition(seqs.ids, total_parts)[parti] 135 partitions = [ [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))][parti] ] 136 elif options.sequence is not None: 137 # Just select one sequence 138 seq = _get_seq_by_index(int(options.sequence)) 139 partitions = [( [seq], model_name, 0 )] 140 part_ids = [seq.id] 141 else: 142 # Don't partition the sequences 143 partitions = [(seqs.sequences, model_name,0)] 144 part_ids = [None] 145 146 return partitions,part_ids,options,arguments 147