Package jazzparser :: Package backoff :: Package ngram :: Module hmmpath
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.backoff.ngram.hmmpath

  1  """Ngram direct semantics models. 
  2   
  3  Here I use ngram models to assign a tonal-space semantics to an input  
  4  sequence. 
  5  This is the model referred to as I{HmmPath} in papers and talks. 
  6   
  7  """ 
  8  """ 
  9  ============================== License ======================================== 
 10   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 11    
 12   This file is part of The Jazz Parser. 
 13    
 14   The Jazz Parser is free software: you can redistribute it and/or modify 
 15   it under the terms of the GNU General Public License as published by 
 16   the Free Software Foundation, either version 3 of the License, or 
 17   (at your option) any later version. 
 18    
 19   The Jazz Parser is distributed in the hope that it will be useful, 
 20   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 22   GNU General Public License for more details. 
 23    
 24   You should have received a copy of the GNU General Public License 
 25   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 26   
 27  ============================ End license ====================================== 
 28   
 29  """ 
 30  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
 31   
 32  from StringIO import StringIO 
 33  from nltk.probability import ConditionalProbDist 
 34   
 35  from jazzparser.utils.nltk.ngram import NgramModel 
 36  from jazzparser.utils.nltk.probability import ESTIMATORS, laplace_estimator, \ 
 37                              get_estimator_name, CutoffConditionalFreqDist, \ 
 38                              logprob, sum_logs 
 39  from jazzparser.utils.base import group_pairs 
 40  from jazzparser.utils.options import ModuleOption, ModuleOptionError, \ 
 41                              choose_from_dict 
 42  from jazzparser.utils.tonalspace import coordinate_to_et_2d 
 43  from jazzparser.utils.loggers import create_dummy_logger 
 44   
 45  from ..base import ModelBackoffBuilder, BackoffModel, \ 
 46                  merge_repeated_points 
 47  from jazzparser.grammar import get_grammar 
 48  from jazzparser.evaluation.parsing import parse_sequence_with_annotations 
 49  from jazzparser.parsers import ParseError 
 50  from jazzparser.taggers import process_chord_input 
 51  from jazzparser.taggers.chordmap import get_chord_mapping, \ 
 52                              get_chord_mapping_module_option 
 53  from jazzparser.taggers.ngram_multi.model import lattice_to_emissions 
 54   
 55  from jazzparser.data.input import DbBulkInput, AnnotatedDbBulkInput, \ 
 56                              ChordInput, WeightedChordLabelInput, DbInput 
 57  from jazzparser.data import Chord 
 58  from jazzparser.data.db_mirrors import Chord as DbChord 
 59   
 60  # We shouldn't really use this formalism-specific data structure here,  
 61  #  but it includes the computation we need to do, so I'm being naughty 
 62  from jazzparser.formalisms.music_halfspan.semantics import EnharmonicCoordinate 
63 64 -def vector(p0, p1):
65 """ 66 Vector from p0 to p1, where the ps are points represented as they are 67 internally in the model: (X,Y,x,y). (x,y) defines the point in the local 68 (enharmonic) space and is that closest to the previous point when 69 (X,Y) = (0,0). (X,Y) defines a shift of enharmonic space. 70 71 """ 72 # We don't care about X0 and Y0 73 X0,Y0,x0,y0 = p0 74 X1,Y1,x1,y1 = p1 75 # Get the basic vector, assuming (X1,Y1)=(0,0) 76 nearest = EnharmonicCoordinate((x0,y0)).nearest((x1,y1)) 77 # Shift this according to X1 and Y1 78 nearest.X += X1 79 nearest.Y += Y1 80 newx, newy = nearest.harmonic_coord 81 return (newx-x0, newy-y0)
82
83 -class HmmPathNgram(NgramModel):
84 """ 85 An ngram model that takes multiple chords (weighted by probability) as 86 input to its decoding. It is trained on labeled data. 87 88 This is similar to 89 L{jazzparser.taggers.ngram_multi.model.MultiChordNgramModel}, but the 90 states represent points on a TS path, rather than categories. 91 92 """
93 - def __init__(self, order, point_transition_counts, fn_transition_counts, 94 type_emission_counts, subst_emission_counts, 95 estimator, backoff_model, chord_map, vector_dom, 96 point_dom, history=""):
97 self.order = order 98 self.backoff_model = backoff_model 99 100 chord_vocab = list(set(chord_map.keys())) 101 self.chord_vocab = chord_vocab 102 internal_chord_vocab = list(set(chord_map.values())) 103 self.chord_map = chord_map 104 self._estimator = estimator 105 106 # Construct the domains by combining possible roots with 107 # the other components of the labels 108 self.vector_dom = vector_dom 109 self.point_dom = point_dom 110 self.label_dom = [(point,function) for point in point_dom \ 111 for function in ["T","D","S"] ] 112 self.num_labels = len(self.label_dom) 113 self.emission_dom = [(root,label) for root in range(12) \ 114 for label in internal_chord_vocab] 115 self.num_emissions = len(self.emission_dom) 116 117 # Keep hold of the freq dists 118 self.point_transition_counts = point_transition_counts 119 self.fn_transition_counts = fn_transition_counts 120 self.type_emission_counts = type_emission_counts 121 self.subst_emission_counts = subst_emission_counts 122 # Make some prob dists 123 self.point_transition_dist = ConditionalProbDist( 124 point_transition_counts, estimator, len(vector_dom)) 125 self.fn_transition_dist = ConditionalProbDist( 126 fn_transition_counts, estimator, 4) # Includes final state 127 self.type_emission_dist = ConditionalProbDist( 128 type_emission_counts, estimator, len(internal_chord_vocab)) 129 self.subst_emission_dist = ConditionalProbDist( 130 subst_emission_counts, estimator, 12) 131 132 # Store a string with information about training, etc 133 self.history = history 134 # Initialize the various caches 135 # These will be filled as we access probabilities 136 self.clear_cache()
137
138 - def add_history(self, string):
139 """ Adds a line to the end of this model's history string. """ 140 self.history += "%s: %s\n" % (datetime.now().isoformat(' '), string)
141 142 @staticmethod
143 - def train(data, estimator, grammar, cutoff=0, logger=None, 144 chord_map=None, order=2, backoff_orders=0, backoff_kwargs={}):
145 """ 146 Initializes and trains an HMM in a supervised fashion using the given 147 training data. Training data should be chord sequence data (input 148 type C{bulk-db} or C{bulk-db-annotated}). 149 150 """ 151 # Prepare a dummy logger if none was given 152 if logger is None: 153 logger = create_dummy_logger() 154 logger.info(">>> Beginning training of ngram backoff model") 155 156 training_data = [] 157 # Generate the gold standard data by parsing the annotations 158 for dbinput in data: 159 # Get a gold standard tonal space sequence 160 try: 161 parses = parse_sequence_with_annotations(dbinput, grammar, \ 162 allow_subparses=False) 163 except ParseError, err: 164 # Just skip this sequence 165 logger.error('Could not get a GS parse of %s: %s' % (dbinput,err)) 166 continue 167 # There should only be one of these now 168 parse = parses[0] 169 if parse is None: 170 logger.error('Could not get a GS parse of %s' % (dbinput)) 171 continue 172 173 # Get the form of the analysis we need for the training 174 if chord_map is None: 175 chords = [(c.root, c.type) for c in dbinput.chords] 176 else: 177 chords = [(c.root, chord_map[c.type]) for c in dbinput.chords] 178 179 points,times = zip(*grammar.formalism.semantics_to_coordinates( 180 parse.semantics)) 181 # Run through the sequence, transforming absolute points into 182 # the condensed relative representation 183 ec0 = EnharmonicCoordinate.from_harmonic_coord(points[0]) 184 # The first point is relative to the origin and always in the 185 # (0,0) enharmonic space 186 rel_points = [(0,0,ec0.x,ec0.y)] 187 for point in points[1:]: 188 ec1 = EnharmonicCoordinate.from_harmonic_coord(point) 189 # Find the nearest enharmonic instance of this point to the last 190 nearest = ec0.nearest((ec1.x, ec1.y)) 191 # Work out how much we have to shift this by to get the point 192 dX = ec1.X - nearest.X 193 dY = ec1.Y - nearest.Y 194 rel_points.append((dX,dY,ec1.x,ec1.y)) 195 ec0 = ec1 196 funs,times = zip(*grammar.formalism.semantics_to_functions( 197 parse.semantics)) 198 199 ### Synchronize the chords with the points and functions 200 # We may need to repeat chords to match up with analysis 201 # points that span multiple chords 202 analysis = iter(zip(rel_points,funs,times)) 203 rel_point, fun, __ = analysis.next() 204 next_rel_point,next_fun,next_anal_time = analysis.next() 205 # Keep track of how much time has elapsed 206 time = 0 207 training_seq = [] 208 reached_end = False 209 for crd_pair,chord in zip(chords, dbinput.chords): 210 if time >= next_anal_time and not reached_end: 211 # Move on to the next analysis point 212 rel_point, fun = next_rel_point, next_fun 213 try: 214 next_rel_point,next_fun,next_anal_time = analysis.next() 215 except StopIteration: 216 # No more points: keep using the same to the end 217 reached_end = True 218 training_seq.append((crd_pair, (rel_point,fun))) 219 time += chord.duration 220 training_data.append(training_seq) 221 222 # Create some empty freq dists 223 subst_emission_counts = CutoffConditionalFreqDist(cutoff=cutoff) 224 type_emission_counts = CutoffConditionalFreqDist(cutoff=cutoff) 225 point_transition_counts = CutoffConditionalFreqDist(cutoff=cutoff) 226 fn_transition_counts = CutoffConditionalFreqDist(cutoff=cutoff) 227 228 seen_vectors = [] 229 seen_points = set() 230 seen_XY = set() 231 232 # Count all the stats from the training data 233 for seq in training_data: 234 # Keep track of the necessary history context 235 history = [] 236 for (chord, (point,fun)) in seq: 237 ### Counts for the emission distribution 238 chord_root,label = chord 239 # Work out the chord substitution 240 X,Y,x,y = point 241 subst = (chord_root - coordinate_to_et_2d((x,y))) % 12 242 # Increment the counts 243 subst_emission_counts[fun].inc(subst) 244 type_emission_counts[(subst,fun)].inc(label) 245 246 seen_points.add(point) 247 seen_XY.add((X,Y)) 248 249 if order > 1: 250 ### Counts for the transition distribution 251 # Update the history fifo 252 history = [(point,fun)] + history[:order-1] 253 if len(history) > 1: 254 points,functions = zip(*history) 255 #~ # Get the vectors between the points 256 #~ vectors = [vector(p0,p1) for (p1,p0) in \ 257 #~ group_pairs([p for p in points if p is not None])] 258 # The function is conditioned on all previous functions 259 fn_context = tuple(functions[1:]) 260 fn_transition_counts[fn_context].inc(functions[0]) 261 262 #~ # The vector is conditioned on the function 263 #~ # and the pairs of vector and function preceding that 264 #~ point_context = tuple([functions[:2]] + 265 #~ list(zip(vectors[1:], functions[2:]))) 266 # The vector is conditioned on the function 267 #~ point_transition_counts[point_context].inc(vectors[0]) 268 vect = vector(points[1], points[0]) 269 point_transition_counts[functions[0]].inc(vect) 270 271 # Keep track of what vectors we've observed 272 #~ seen_vectors.append(vectors[0]) 273 seen_vectors.append(vect) 274 else: 275 # For the first point, we only count the function prob 276 fn_transition_counts[tuple()].inc(fun) 277 278 if order > 1: 279 # Count the transition to the final state 280 history = history[:order-1] 281 points,functions = zip(*history) 282 fn_context = tuple(functions) 283 fn_transition_counts[fn_context].inc(None) 284 285 # Labels are (X,Y,x,y). We want all the points seen in the data 286 # and all (0,0,x,y) 287 for X,Y in seen_XY: 288 for x in range(4): 289 for y in range(3): 290 seen_points.add((X,Y,x,y)) 291 point_dom = list(seen_points) 292 293 if backoff_orders > 0: 294 # Default to using the same params for the backoff model 295 kwargs = { 296 'cutoff' : cutoff, 297 'chord_map' : chord_map, 298 } 299 kwargs.update(backoff_kwargs) 300 301 logger.info("Training backoff model") 302 # Train a backoff model 303 backoff = HmmPathNgram.train(data, estimator, grammar, logger=logger, 304 order=order-1, backoff_orders=backoff_orders-1, 305 backoff_kwargs=backoff_kwargs, **kwargs) 306 else: 307 backoff = None 308 309 # Get a list of every vector in the training set 310 vector_dom = list(set(seen_vectors)) 311 312 return HmmPathNgram(order, 313 point_transition_counts, 314 fn_transition_counts, 315 type_emission_counts, 316 subst_emission_counts, 317 estimator, 318 backoff, 319 chord_map, 320 vector_dom, 321 point_dom)
322 323 ################## Probabilities ###################
324 - def transition_log_probability(self, *states):
325 states = [s if s is not None else (None,None) for s in states] 326 327 if self.order == 1: 328 # Transitions are all equiprobable 329 return - logprob(len(self.label_dom)) 330 331 points,functions = zip(*states) 332 333 if points[0] is None: 334 # Just use the fun transition to get prob of final state 335 fn_context = tuple(functions[:1]) 336 return self.fn_transition_dist[fn_context].logprob(None) 337 338 if all(p is None for p in points[1:]) == 1: 339 # Initial states: all points equiprobable 340 # Only permit points in the (0,0) enharmonic space 341 if points[0][0] != 0 or points[0][1] != 0: 342 return float('-inf') 343 # Get fn prob from initial dist 344 return self.fn_transition_dist[tuple()].logprob(functions[0]) - logprob(12) 345 346 # The function is conditioned on all previous functions 347 fn_context = tuple(functions[1:]) 348 fn_prob = self.fn_transition_dist[fn_context].logprob(functions[0]) 349 350 vect = vector(points[1], points[0]) 351 # The vector is conditioned on the function 352 # and the pairs of vector and function preceding that 353 vector_prob = self.point_transition_dist[functions[0]].logprob(vect) 354 355 # Multiply together the vector and function probs 356 return vector_prob + fn_prob
357
358 - def emission_log_probability(self, emission, state):
359 """ 360 Gives the probability P(emission | label). Returned as a base 2 361 log. 362 363 The emission should be a pair of (root,label), together defining a 364 chord. 365 366 There's a special case of this. If the emission is a list, it's 367 assumed to be a I{distribution} over emissions. The list should 368 contain (prob,em) pairs, where I{em} is an emission, such as is 369 normally passed into this function, and I{prob} is the weight to 370 give to this possible emission. The probabilities of the possible 371 emissions are summed up, weighted by the I{prob} values. 372 373 """ 374 if type(emission) is list: 375 # Average probability over the possible emissions 376 probs = [] 377 for (prob,em) in emission: 378 probs.append(logprob(prob) + \ 379 self.emission_log_probability(em, state)) 380 return sum_logs(probs) 381 382 # Single chord label 383 point,function = state 384 chord_root,label = emission 385 X,Y,x,y = point 386 # Work out the chord substitution 387 subst = (chord_root - coordinate_to_et_2d((x,y))) % 12 388 389 # Generate the substitution given the chord function 390 subst_prob = self.subst_emission_dist[function].logprob(subst) 391 # Generate the label given the subst and chord function 392 label_prob = self.type_emission_dist[(subst,function)].logprob(label) 393 return subst_prob + label_prob
394 395 ################## Storage ####################
396 - def to_picklable_dict(self):
397 from jazzparser.utils.nltk.storage import object_to_dict 398 399 if self.backoff_model is not None: 400 backoff_model = self.backoff_model.to_picklable_dict() 401 else: 402 backoff_model = None 403 404 return { 405 'order' : self.order, 406 'point_transition_counts' : object_to_dict(self.point_transition_counts), 407 'fn_transition_counts' : object_to_dict(self.fn_transition_counts), 408 'type_emission_counts' : object_to_dict(self.type_emission_counts), 409 'subst_emission_counts' : object_to_dict(self.subst_emission_counts), 410 'estimator' : self._estimator, 411 'backoff_model' : backoff_model, 412 'chord_map' : self.chord_map.name, 413 'vector_dom' : self.vector_dom, 414 'point_dom' : self.point_dom, 415 'history' : self.history, 416 }
417 418 @classmethod
419 - def from_picklable_dict(cls, data):
420 from jazzparser.utils.nltk.storage import dict_to_object 421 422 if data['backoff_model'] is not None: 423 backoff_model = cls.from_picklable_dict(data['backoff_model']) 424 else: 425 backoff_model = None 426 427 return cls(data['order'], 428 dict_to_object(data['point_transition_counts']), 429 dict_to_object(data['fn_transition_counts']), 430 dict_to_object(data['type_emission_counts']), 431 dict_to_object(data['subst_emission_counts']), 432 data['estimator'], 433 backoff_model, 434 get_chord_mapping(data['chord_map']), 435 data['vector_dom'], 436 data['point_dom'], 437 history=data.get('history', ''))
438
439 440 -class HmmPathModel(BackoffModel):
441 """ 442 Model type that uses an ngram model to assign a tonal space 443 path to a sequence. This class provides the interface to the model 444 training and decoding. The details are all implemented in the 445 model class above. 446 447 """ 448 MODEL_TYPE = "ngram" 449 # Set up possible options for training 450 TRAINING_OPTIONS = [ 451 ModuleOption('n', filter=int, 452 help_text="Length of the n-grams which this model will use.", 453 usage="n=N, where N is an integer. Defaults to bigrams", default=2), 454 ModuleOption('backoff', filter=int, 455 help_text="Number of orders of backoff to use. This must be "\ 456 "less than n. E.g. if using a trigram model (n=3) you can "\ 457 "set backoff=2 to back off to bigrams and from bigrams "\ 458 "to unigrams. Set to 0 to use no backoff at all (default).", 459 usage="backoff=X, where X is an integer < n", default=0), 460 ModuleOption('cutoff', filter=int, 461 help_text="In estimating probabilities, treat any counts below "\ 462 "cutoff as zero", 463 usage="cutoff=X, where X is an integer", default=0), 464 ModuleOption('estimator', filter=choose_from_dict(ESTIMATORS), 465 help_text="A way of constructing a probability model given "\ 466 "the set of counts from the data. Default is to use "\ 467 "laplace (add-one) smoothing.", 468 usage="estimator=X, where X is one of: %s" % ", ".join(ESTIMATORS.keys()), 469 default=laplace_estimator), 470 get_chord_mapping_module_option(), 471 ] + BackoffModel.TRAINING_OPTIONS 472
473 - def __init__(self, model_name, model=None, grammar=None, *args, **kwargs):
474 super(HmmPathModel, self).__init__(model_name, *args, **kwargs) 475 self.model = model 476 self.grammar = grammar 477 478 if self.options['n'] <= self.options['backoff']: 479 # This is not allowed 480 # We can only back off n-1 orders for an n-gram model 481 raise TaggingModelError, "tried to load an n-gram model with "\ 482 "more orders of backoff than are possible (backing off "\ 483 "%d orders on a %d-gram model)" % \ 484 (self.options['backoff'], self.options['n'])
485
486 - def train(self, data, grammar=None, logger=None):
487 if grammar is None: 488 from jazzparser.grammar import get_grammar 489 # Load the default grammar 490 grammar = get_grammar() 491 492 model = HmmPathNgram.train(data, self.options['estimator'], grammar, 493 cutoff=self.options['cutoff'], 494 chord_map=self.options['chord_mapping'], 495 order=self.options['n'], 496 backoff_orders=self.options['backoff']) 497 self.model = model 498 499 # Add some model-specific info into the descriptive text 500 # so we know how it was trained 501 est_name = get_estimator_name(self.options['estimator']) 502 self.model_description = """\ 503 Model order: %(order)d 504 Backoff orders: %(backoff)d 505 Probability estimator: %(est)s 506 Zero-count threshold: %(cutoff)d 507 Training sequences: %(seqs)d 508 Training samples: %(samples)d\ 509 """ % \ 510 { 511 'est' : est_name, 512 'seqs' : len(data), 513 'samples' : sum([len(s) for s in data], 0), 514 'order' : self.options['n'], 515 'backoff' : self.options['backoff'], 516 'cutoff' : self.options['cutoff'], 517 }
518 519 @staticmethod
520 - def _load_model(data):
521 model = HmmPathNgram.from_picklable_dict(data['model']) 522 name = data['name'] 523 return HmmPathModel(name, model=model)
524
525 - def _get_model_data(self):
526 data = { 527 'name' : self.model_name, 528 'model' : self.model.to_picklable_dict() 529 } 530 return data
531
532 - def forward_probabilities(self, sequence):
533 """ Interface to the NgramModel's forward_probabilities """ 534 return self.model.forward_probabilities(sequence)
535
536 - def forward_backward_probabilities(self, sequence):
537 return self.model.normal_forward_backward_probabilities(sequence)
538
539 - def viterbi_probabilities(self, sequence):
540 return self.model.viterbi_selector_probabilities(sequence)
541
542 - def viterbi_paths(self, sequence, paths=None):
543 return self.model.generalized_viterbi(sequence, N=paths)
544
545 - def _get_labels(self):
546 return self.model.label_dom
547 labels = property(_get_labels) 548 549 ############ Parameter output ###########
550 - def _get_readable_parameters(self):
551 """ Produce a human-readable repr of the params of the model """ 552 buff = StringIO() 553 model = self.model 554 555 try: 556 # Include the stored model description 557 print >>buff, self.model_description 558 print >>buff, "\nNum emissions: %d" % model.num_emissions 559 560 print >>buff, "\nChord mapping: %s:" % model.chord_map.name 561 for (crdin, crdout) in model.chord_map.items(): 562 print >>buff, " %s -> %s" % (crdin, crdout) 563 564 print >>buff, "\nPoint transition dist" 565 for cond in sorted(model.point_transition_dist.conditions()): 566 print >>buff, " %s" % str(cond) 567 for prob,samp in reversed(sorted(\ 568 (model.point_transition_dist[cond].prob(interval), 569 interval) for \ 570 interval in model.point_transition_dist[cond].samples())): 571 print >>buff, " %s: %s " % (samp, prob) 572 print >>buff 573 574 print >>buff, "Function transition dist" 575 for context in sorted(model.fn_transition_dist.conditions()): 576 print >>buff, " %s" % ",".join([str(s) for s in context]) 577 for prob,samp in reversed(sorted(\ 578 (model.fn_transition_dist[context].prob(samp), 579 samp) for \ 580 samp in model.fn_transition_dist[context].samples())): 581 print >>buff, " %s: %s " % (samp, prob) 582 print >>buff 583 584 print >>buff, "Substition emission dist" 585 for state in sorted(model.subst_emission_dist.conditions()): 586 print >>buff, " %s" % state 587 for prob,subst in reversed(sorted(\ 588 (model.subst_emission_dist[state].prob(subst), 589 subst) for \ 590 subst in model.subst_emission_dist[state].samples())): 591 print >>buff, " %s: %s " % (subst, prob) 592 593 print >>buff, "Chord type emission dist" 594 for state in sorted(model.type_emission_dist.conditions()): 595 print >>buff, " %s" % str(state) 596 for prob,chord in reversed(sorted(\ 597 (model.type_emission_dist[state].prob(chord), 598 chord) for \ 599 chord in self.model.type_emission_dist[state].samples())): 600 print >>buff, " %s: %s " % (chord, prob) 601 except AttributeError, err: 602 # Catch this, because otherwise it just looks like the attribute 603 # (readable_parameters) doesn't exist (stupid Python behaviour) 604 raise ValueError, "error generating model description "\ 605 "(attribute error): %s" % err 606 607 return buff.getvalue()
608 readable_parameters = property(_get_readable_parameters)
609
610 -class HmmPathBuilder(ModelBackoffBuilder):
611 """ 612 Builds a semantics using an ngram model. Can take input from chord 613 sequences or a weighted lattice of chords. 614 615 """ 616 MODEL_CLASS = HmmPathModel 617 BUILDER_OPTIONS = ModelBackoffBuilder.BUILDER_OPTIONS + [ 618 ModuleOption('paths', filter=int, 619 help_text="Number of paths to suggest.", 620 usage="paths=X, where X is an integer", 621 default=10), 622 ] 623 INPUT_TYPES = ['db', 'chords', 'labels'] 624
625 - def __init__(self, input, options={}, grammar=None, *args, **kwargs):
626 super(HmmPathBuilder, self).__init__(input, options, *args, **kwargs) 627 process_chord_input(self) 628 629 if grammar is None: 630 self.grammar = get_grammar() 631 else: 632 self.grammar = grammar 633 634 #### Tag the input sequence #### 635 self._tagged_data = [] 636 637 chord_map = self.model.model.chord_map 638 if isinstance(self.wrapped_input, ChordInput): 639 chords = self.wrapped_input.to_db_input().chords 640 observations = [(chord.root, chord_map[chord.type]) for chord in 641 chords] 642 self.input = chords 643 elif isinstance(self.wrapped_input, DbInput): 644 observations = [(chord.root, chord_map[chord.type]) for chord in 645 self.wrapped_input.chords] 646 elif isinstance(self.wrapped_input, WeightedChordLabelInput): 647 observations = lattice_to_emissions(input, chord_map=chord_map) 648 649 # Use the ngram model to get tag probabilities for each input by 650 # computing the state occupation probability matrix 651 path_probs = self.model.viterbi_paths(observations, self.options['paths']) 652 653 self._paths = [ 654 self.grammar.formalism.backoff_states_to_lf(zip(states,self.times)) 655 for states,prob in path_probs] 656 # Set the probability on each result 657 for path,(states,prob) in zip(self._paths,path_probs): 658 path.probability = prob
659 660 @property
661 - def num_paths(self):
662 return len(self._paths)
663
664 - def get_tonal_space_path(self, rank=0):
665 if rank >= len(self._paths): 666 return None 667 else: 668 return self._paths[rank]
669