Package jazzparser :: Package data :: Module parsing
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.data.parsing

  1  """Some data utilities relating to parsing annotated data to evaluate model. 
  2   
  3  Tools for running the parser on input from the database to test the  
  4  data. 
  5  Note that these should be used on the database mirrors (see  
  6  jazzparser.data.db_mirrors) so that they can be run independently of  
  7  the database itself. 
  8   
  9  This module now provides some utilities for the parsing routines. The  
 10  actual evaluation routines are in jazzparser.evaluation.parsing. 
 11   
 12  """ 
 13  """ 
 14  ============================== License ======================================== 
 15   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 16    
 17   This file is part of The Jazz Parser. 
 18    
 19   The Jazz Parser is free software: you can redistribute it and/or modify 
 20   it under the terms of the GNU General Public License as published by 
 21   the Free Software Foundation, either version 3 of the License, or 
 22   (at your option) any later version. 
 23    
 24   The Jazz Parser is distributed in the hope that it will be useful, 
 25   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 26   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 27   GNU General Public License for more details. 
 28    
 29   You should have received a copy of the GNU General Public License 
 30   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 31   
 32  ============================ End license ====================================== 
 33   
 34  """ 
 35  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
 36   
 37  import sys 
 38   
 39  from jazzparser.parsers import ParseError 
 40  from jazzparser.grammar import get_grammar 
41 42 -def keys_for_sequence(sequence, grammar=None):
43 """ 44 Takes a chord sequence from the chord corpus and parses using its 45 annotations. Returns a list of the key (as a pitch class integer) for 46 each chord. 47 48 This is simply worked out, once the parse is done. Every chord in a cadence 49 has the same key as the resolution of the cadence, which can be read off 50 by taking the equal temperament pitch class for the tonal space point of 51 the resolution. 52 53 """ 54 from jazzparser.evaluation.parsing import parse_sequence_with_annotations 55 if grammar is None: 56 grammar = get_grammar() 57 # Try parsing the sequence according to the tree in the database 58 sub_parses = parse_sequence_with_annotations(sequence, grammar) 59 if len(sub_parses) > 1: 60 # We can only continue if we got a full parse 61 raise ParseError, "could not fully parse the sequence %s." % \ 62 sequence.string_name 63 sems = sub_parses[0].semantics 64 65 # Get the keys for this LF, and the times when they start 66 keys = grammar.formalism.semantics_to_keys(sems) 67 key_roots, change_times = zip(*keys) 68 key_roots = iter(key_roots) 69 change_times = iter(change_times) 70 71 chords = iter(sequence) 72 # Get the first key as the current key 73 key = key_roots.next() 74 # Ignore the first time, as it should be 0 75 change_times.next() 76 chord_keys = [] 77 try: 78 # Get the next time at which we'll need to change 79 next_change = change_times.next() 80 81 time = 0 82 for chord in sequence.chords: 83 if time >= next_change: 84 # Move onto the next key 85 key = key_roots.next() 86 next_change = change_times.next() 87 # Add the next chord with the current key value 88 chord_keys.append((chord, key)) 89 time += chord.duration 90 except StopIteration: 91 # No more timings left 92 # Include the rest of the chords with the current key 93 for chord in chords: 94 chord_keys.append((chord, key)) 95 96 return chord_keys
97
98 -class ParseResults(object):
99 """ 100 A simple wrapper object to store the results of a parse, plus the 101 gold standard result, so that they can easily be dumped out to 102 a file using pickle. 103 104 The gold parse may be omitted if it's not available. Alternatively, 105 you may store an annotated sequence as your gold standard: this 106 should go in C{gold_sequence}. You may, of course, store both. 107 108 @note: this used to store a list of logical forms. Now it can store signs 109 as well: in this case, the logical form can be found in C{sign.semantics} 110 for any sign and C{signs} should be C{True}. 111 112 """
113 - def __init__(self, parses, gold_parse=None, signs=False, \ 114 gold_sequence=None, timed_out=None, cpu_time=None):
115 self.parses = parses 116 """ 117 List of (probability,interpretation) tuples, where the 118 interpretation is a sign parse result, or a logical form. Which is used 119 should be reflected in C{signs}. 120 """ 121 self.gold_parse = gold_parse 122 """The interpretation (tonal space semantics) given by the gold standard.""" 123 self.signs = signs 124 """True is the stored parses are signs and not logical forms.""" 125 self.gold_sequence = gold_sequence 126 """Gold standard interpretation in the form of an annotated chord sequence.""" 127 self.timed_out = timed_out 128 """True if the parse timed out (might still have results from a backoff model).""" 129 self.cpu_time = cpu_time 130 """Time taken for the parse, measured in CPU time (not wall clock)."""
131
132 - def __get_sorted_results(self):
133 """ 134 The list of results (TS interpretations or signs) ordered by 135 descending probability. 136 137 """ 138 return list(reversed(sorted(self.parses, key=lambda p:p[0])))
139 sorted_results = property(__get_sorted_results) 140
141 - def __get_semantics(self):
142 """ 143 Always returns a list of (probability,semantics) pairs, whether or 144 not the results were stored as signs. The results are sorted by 145 descending probability. 146 147 """ 148 if hasattr(self, "signs") and self.signs: 149 lfs = [(prob,res.semantics) for (prob,res) in self.parses] 150 else: 151 lfs = self.parses 152 return list(reversed(sorted(lfs, key=lambda p:p[0])))
153 semantics = property(__get_semantics) 154
155 - def get_gold_semantics(self):
156 """ 157 Tries to return a gold standard semantics. In some cases this is 158 stored along with the results in C{gold_parse}. In others this is 159 not available, but a gold annotated chord sequence is: then we 160 can get the gold semantics by parsing the annotations. Note that 161 this might take a little bit of time. 162 163 In other cases neither is available. Then C{None} will be returned. 164 165 """ 166 from jazzparser.evaluation.parsing import parse_sequence_with_annotations 167 168 if self.gold_parse is not None: 169 return self.gold_parse 170 elif self.gold_sequence is not None: 171 # Parse the annotations to get a semantics 172 try: 173 gold_parses = parse_sequence_with_annotations( 174 self.gold_sequence, 175 grammar=get_grammar(), 176 allow_subparses=False) 177 if len(gold_parses) != 1: 178 # This shouldn't happen, since allow_subparses was False 179 return None 180 # Got a result: return its semantics 181 return gold_parses[0].semantics 182 except ParseError: 183 # Could not parse annotated sequence 184 return None 185 else: 186 return None
187
188 - def get_top_result(self):
189 """ 190 Loads the top parse result and the gold standard result. 191 Both a None if there are no results. 192 193 @rtype: pair 194 @return: top parser semantics and gold standard semantics 195 196 """ 197 if len(self.parses) == 0: 198 return None,None 199 gold = self.get_gold_semantics() 200 top_res = self.semantics[0][1] 201 return top_res,gold
202
203 - def get_name(self):
204 """ 205 Returns a name for the input if one is available, otherwise None. 206 207 """ 208 if self.gold_sequence is not None: 209 return self.gold_sequence.string_name 210 return
211
212 - def save(self, filename):
213 import cPickle as pickle 214 file = open(filename, 'w') 215 pickle.dump(self, file, -1) 216 file.close()
217 218 @staticmethod
219 - def from_file(filename):
220 import cPickle as pickle 221 file = open(filename, 'r') 222 data = file.read() 223 file.close() 224 try: 225 obj = pickle.loads(data) 226 except Exception, err: 227 # It would be nice to except a specific exception, but 228 # unfortunately unpickling can raise pretty much anything, 229 # (because it's badly written) 230 raise ParseResults.LoadError, "could not read parse results "\ 231 "from %s. %s: %s" % (filename, type(err).__name__, err) 232 return obj
233
234 - class LoadError(Exception):
235 pass
236