1 """Some data utilities relating to parsing annotated data to evaluate model.
2
3 Tools for running the parser on input from the database to test the
4 data.
5 Note that these should be used on the database mirrors (see
6 jazzparser.data.db_mirrors) so that they can be run independently of
7 the database itself.
8
9 This module now provides some utilities for the parsing routines. The
10 actual evaluation routines are in jazzparser.evaluation.parsing.
11
12 """
13 """
14 ============================== License ========================================
15 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
16
17 This file is part of The Jazz Parser.
18
19 The Jazz Parser is free software: you can redistribute it and/or modify
20 it under the terms of the GNU General Public License as published by
21 the Free Software Foundation, either version 3 of the License, or
22 (at your option) any later version.
23
24 The Jazz Parser is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 GNU General Public License for more details.
28
29 You should have received a copy of the GNU General Public License
30 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
31
32 ============================ End license ======================================
33
34 """
35 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
36
37 import sys
38
39 from jazzparser.parsers import ParseError
40 from jazzparser.grammar import get_grammar
43 """
44 Takes a chord sequence from the chord corpus and parses using its
45 annotations. Returns a list of the key (as a pitch class integer) for
46 each chord.
47
48 This is simply worked out, once the parse is done. Every chord in a cadence
49 has the same key as the resolution of the cadence, which can be read off
50 by taking the equal temperament pitch class for the tonal space point of
51 the resolution.
52
53 """
54 from jazzparser.evaluation.parsing import parse_sequence_with_annotations
55 if grammar is None:
56 grammar = get_grammar()
57
58 sub_parses = parse_sequence_with_annotations(sequence, grammar)
59 if len(sub_parses) > 1:
60
61 raise ParseError, "could not fully parse the sequence %s." % \
62 sequence.string_name
63 sems = sub_parses[0].semantics
64
65
66 keys = grammar.formalism.semantics_to_keys(sems)
67 key_roots, change_times = zip(*keys)
68 key_roots = iter(key_roots)
69 change_times = iter(change_times)
70
71 chords = iter(sequence)
72
73 key = key_roots.next()
74
75 change_times.next()
76 chord_keys = []
77 try:
78
79 next_change = change_times.next()
80
81 time = 0
82 for chord in sequence.chords:
83 if time >= next_change:
84
85 key = key_roots.next()
86 next_change = change_times.next()
87
88 chord_keys.append((chord, key))
89 time += chord.duration
90 except StopIteration:
91
92
93 for chord in chords:
94 chord_keys.append((chord, key))
95
96 return chord_keys
97
99 """
100 A simple wrapper object to store the results of a parse, plus the
101 gold standard result, so that they can easily be dumped out to
102 a file using pickle.
103
104 The gold parse may be omitted if it's not available. Alternatively,
105 you may store an annotated sequence as your gold standard: this
106 should go in C{gold_sequence}. You may, of course, store both.
107
108 @note: this used to store a list of logical forms. Now it can store signs
109 as well: in this case, the logical form can be found in C{sign.semantics}
110 for any sign and C{signs} should be C{True}.
111
112 """
113 - def __init__(self, parses, gold_parse=None, signs=False, \
114 gold_sequence=None, timed_out=None, cpu_time=None):
115 self.parses = parses
116 """
117 List of (probability,interpretation) tuples, where the
118 interpretation is a sign parse result, or a logical form. Which is used
119 should be reflected in C{signs}.
120 """
121 self.gold_parse = gold_parse
122 """The interpretation (tonal space semantics) given by the gold standard."""
123 self.signs = signs
124 """True is the stored parses are signs and not logical forms."""
125 self.gold_sequence = gold_sequence
126 """Gold standard interpretation in the form of an annotated chord sequence."""
127 self.timed_out = timed_out
128 """True if the parse timed out (might still have results from a backoff model)."""
129 self.cpu_time = cpu_time
130 """Time taken for the parse, measured in CPU time (not wall clock)."""
131
133 """
134 The list of results (TS interpretations or signs) ordered by
135 descending probability.
136
137 """
138 return list(reversed(sorted(self.parses, key=lambda p:p[0])))
139 sorted_results = property(__get_sorted_results)
140
142 """
143 Always returns a list of (probability,semantics) pairs, whether or
144 not the results were stored as signs. The results are sorted by
145 descending probability.
146
147 """
148 if hasattr(self, "signs") and self.signs:
149 lfs = [(prob,res.semantics) for (prob,res) in self.parses]
150 else:
151 lfs = self.parses
152 return list(reversed(sorted(lfs, key=lambda p:p[0])))
153 semantics = property(__get_semantics)
154
156 """
157 Tries to return a gold standard semantics. In some cases this is
158 stored along with the results in C{gold_parse}. In others this is
159 not available, but a gold annotated chord sequence is: then we
160 can get the gold semantics by parsing the annotations. Note that
161 this might take a little bit of time.
162
163 In other cases neither is available. Then C{None} will be returned.
164
165 """
166 from jazzparser.evaluation.parsing import parse_sequence_with_annotations
167
168 if self.gold_parse is not None:
169 return self.gold_parse
170 elif self.gold_sequence is not None:
171
172 try:
173 gold_parses = parse_sequence_with_annotations(
174 self.gold_sequence,
175 grammar=get_grammar(),
176 allow_subparses=False)
177 if len(gold_parses) != 1:
178
179 return None
180
181 return gold_parses[0].semantics
182 except ParseError:
183
184 return None
185 else:
186 return None
187
189 """
190 Loads the top parse result and the gold standard result.
191 Both a None if there are no results.
192
193 @rtype: pair
194 @return: top parser semantics and gold standard semantics
195
196 """
197 if len(self.parses) == 0:
198 return None,None
199 gold = self.get_gold_semantics()
200 top_res = self.semantics[0][1]
201 return top_res,gold
202
204 """
205 Returns a name for the input if one is available, otherwise None.
206
207 """
208 if self.gold_sequence is not None:
209 return self.gold_sequence.string_name
210 return
211
212 - def save(self, filename):
213 import cPickle as pickle
214 file = open(filename, 'w')
215 pickle.dump(self, file, -1)
216 file.close()
217
218 @staticmethod
220 import cPickle as pickle
221 file = open(filename, 'r')
222 data = file.read()
223 file.close()
224 try:
225 obj = pickle.loads(data)
226 except Exception, err:
227
228
229
230 raise ParseResults.LoadError, "could not read parse results "\
231 "from %s. %s: %s" % (filename, type(err).__name__, err)
232 return obj
233
236