jazzparser.backoff.base

1 """Base classes for grammarless tonal space models. 2 3 """ 4 """ 5 ============================== License ======================================== 6 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 7 8 This file is part of The Jazz Parser. 9 10 The Jazz Parser is free software: you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation, either version 3 of the License, or 13 (at your option) any later version. 14 15 The Jazz Parser is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>. 22 23 ============================ End license ====================================== 24 25 """ 26 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>" 27 28 import os 29 from jazzparser import settings 30 from jazzparser.taggers.models import TaggerModel 31 from jazzparser.utils.options import ModuleOption 32 from jazzparser.data.input import detect_input_type 33 from jazzparser.utils.loggers import create_plain_stderr_logger

34 35 -class BackoffModel(TaggerModel):

36 """ 37 A model to be used by a L{BackoffBuilder}. 38 39 The model-handling interface is inherited directly from 40 L{TaggerModel<jazzparser.taggers.models.TaggerModel>} and simply 41 stores the models to a different location. 42 43 @note: if you're subclassing this, take a look at the 44 L{TaggerModel<jazzparser.taggers.models.TaggerModel>} interface 45 for requirements. 46 47 """ 48 @classmethod

49 - def _get_model_dir(cls):

50 if cls.MODEL_TYPE is None: 51 raise NotImplementedError, "cannot load model: %s has not set a model type name" % cls.__name__ 52 return os.path.join(settings.BACKOFF_MODEL_DATA_DIR, cls.MODEL_TYPE)

53

54 55 -class BackoffBuilder(object):

56 """ 57 Defines the interface and common functions for models that assign 58 a semantics directly to an input sequence. 59 60 The evaluation interface is similar to the 61 L{Tagger<jazzparser.taggers.Tagger>} interface. 62 63 """ 64 # When subclassing, make sure to include this in the options if you override 65 BUILDER_OPTIONS = [] 66

67 - def __init__(self, input, options={}, logger=None):

68 # Initialize using tagger-specific options 69 self.options = type(self).check_options(options) 70 # Check what input type we've received and preprocess it 71 datatype, input = detect_input_type(input, allowed=self.INPUT_TYPES) 72 # Store this for the subclass to use as appropriate 73 self.input = input 74 self.original_input = input 75 # Subclasses may redefine self.input to taste 76 # We keep the original wrapped input somewhere where it's sure to remain 77 self.wrapped_input = input 78 # Make sure we have some logger 79 if logger is None: 80 # Output to stderr instead 81 self.logger = create_plain_stderr_logger() 82 else: 83 self.logger = logger

84 85 @classmethod

86 - def check_options(cls, options):

87 return ModuleOption.process_option_dict(options, cls.BUILDER_OPTIONS)

88 89 @property

90 - def num_paths(self):

91 return 0

92

93 - def _get_name(self):

94 return type(self).__module__.rpartition(".")[2]

95 name = property(_get_name) 96

97 - def _get_input_length(self):

98 """ 99 Should return the number of words (chords) in the input, or 100 some other measure of input length appropriate to the type of 101 input. 102 103 """ 104 return len(self.input)

105 input_length = property(_get_input_length) 106

107 - def get_tonal_space_path(self, rank=0):

108 """ 109 This is the main interface method. 110 111 @type rank: int 112 @param rank: the rank of the path the get, where 0 is the 113 highest ranked path 114 115 @rtype: L{jazzparser.formalisms.base.semantics.lambdacalc.Semantics} 116 subclass instance 117 @return: the C{rank}th highest ranked path through the tonal 118 space for this sequence. Returns C{None} if there is no 119 path with this rank. 120 121 """ 122 raise NotImplementedError, "called get_tonal_space_path() on "\ 123 "base BackoffBuilder instance."

124

125 - def get_all_paths(self):

126 """ 127 Gets a list of all the tonal space paths, highest rank first. 128 Just a convenience method to get all the paths using 129 L{get_tonal_space_path} for every rank (self.num_paths). 130 131 """ 132 return [self.get_tonal_space_path(i) for i in range(self.num_paths)]

133

134 135 -class ModelBackoffBuilder(BackoffBuilder):

136 """ 137 Subclass of L{BackoffBuilder} that handles model loading. 138 139 """ 140 MODEL_CLASS = None # This should be set by subclasses 141 142 BUILDER_OPTIONS = BackoffBuilder.BUILDER_OPTIONS + [ 143 ModuleOption('model', filter=str, 144 help_text="Model name. This model must have been previously trained. Required", 145 usage="model=X, where X is the name of a trained model", 146 required=True), 147 ModuleOption('partition', filter=int, 148 help_text="If given, the numbered partition of the partitioned "\ 149 "model will be used. (This generally involves appending the "\ 150 "partition number to the model name.)", 151 usage="partition=P, where P is an int", 152 default=None 153 ), 154 ] 155

156 - def __init__(self, *args, **kwargs):

157 BackoffBuilder.__init__(self, *args, **kwargs) 158 # Check the subclass is properly defined 159 if type(self).MODEL_CLASS is None: 160 raise NotImplementedError, "BackoffBuilder "\ 161 "subclass %s does not define a model class" % type(self).__name__ 162 if self.options['partition'] is not None: 163 self.model_name = type(self).partition_model_name( 164 self.options['model'], 165 self.options['partition']) 166 else: 167 self.model_name = self.options['model'] 168 self.logger.info("Backoff model: %s" % self.model_name) 169 170 # Load a TaggerModel subclass instance to load the trained model data 171 self.model = (type(self).MODEL_CLASS).load_model(self.model_name)

172 173 @staticmethod

174 - def partition_model_name(model_name, partition_number):

175 """ 176 The model name to use when the given partition number is requested. 177 The default implementation simply appends the number to the model 178 name. Subclasses may override this if they want to do something 179 different. 180 181 """ 182 return "%s%d" % (model_name, partition_number)

183

184 185 -def merge_repeated_points(path):

186 """ 187 A tonal space path, represented as a list of TonalDenotations, 188 gets generated by the models. It may be sensible for a model to 189 generate exactly one point per chord, in which case repeated 190 points ought to be removed from the path before evaluating. 191 192 This removes repeated points and returns the result. 193 194 """ 195 new_path = [path[0]] 196 last_point = path[0] 197 198 for point in path[1:]: 199 if last_point.root_number != point.root_number or \ 200 last_point.function != point.function: 201 new_path.append(point) 202 return new_path

203

Source Code for Module jazzparser.backoff.base