1 """Corpus file handling for David Temperley's corpora.
2
3 David Temperley has various corpora that he has used in his books:
4 - The Cognition of Basic Musical Structures (2001)
5 - Music and Probability (2007)
6
7 He evaluates his own techniques and others' on this data, so it is
8 an important comparison for me. It's also an important source of
9 annotated data, aside for my own small corpus.
10
11 This module provides utilities for reading in the corpus files and
12 representing the data internally.
13
14 The data formats are described in Temperley's documentation for the
15 programs that make up Melisma.
16
17 @note: This implementation is not intended to be complete. It may not handle
18 all types of input that Temperley describes. I'm just implementing
19 things as I need them.
20
21 """
22 """
23 ============================== License ========================================
24 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
25
26 This file is part of The Jazz Parser.
27
28 The Jazz Parser is free software: you can redistribute it and/or modify
29 it under the terms of the GNU General Public License as published by
30 the Free Software Foundation, either version 3 of the License, or
31 (at your option) any later version.
32
33 The Jazz Parser is distributed in the hope that it will be useful,
34 but WITHOUT ANY WARRANTY; without even the implied warranty of
35 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36 GNU General Public License for more details.
37
38 You should have received a copy of the GNU General Public License
39 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
40
41 ============================ End license ======================================
42
43 """
44 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
45
46 import os
47 from jazzparser.data.corpora import get_corpus_file
48
49 _EVENTS = {}
52 """
53 Superclass of all events that occur in the data.
54
55 """
56 EVENT_NAME = None
57 """The identifier that begins the line of an input file for this event."""
58
59 @staticmethod
61 """
62 Creates an instance of the appropriate event type given a line
63 of an input file.
64
65 All subclasses should define a C{from_line} that creates an
66 instance of them given the list of string arguments from a line
67 of an input file.
68
69 """
70 name = line.split()[0]
71
72 if name not in _EVENTS:
73 raise InputError, "unknown event type %s" % name
74 return _EVENTS[name].from_line(line.split()[1:])
75
77 """
78 Generates a string representation suitable for writing out to
79 a line of a file.
80
81 """
82 raise NotImplementedError, "%s does not implement to_line()" % type(self).__name__
83
86
100
103
109
111 """
112 Tonal pitch-class note, found in TPC files and chord files.
113 A tonal pitch-class is a note as notated - i.e. it distinguishes
114 all differently named notes. They are represented as a position on
115 the line of fifths:
116
117 ...-2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 ...
118 ... Ab Eb Bb F C G D A E B F# C# G# D# A#...
119
120 They are also redundantly represented as a midi note value.
121
122 """
123 EVENT_NAME = "TPCNote"
124
125 - def __init__(self, start, end, note, pitch_class):
126 self.start = start
127 self.end = end
128 self.note = note
129 self.pitch_class = pitch_class
130
131 @staticmethod
133 debug_line = " ".join(line)
134 if len(line) != 4:
135 raise InputError, "TPCNote event needs 4 arguments. %d "\
136 "found: %s" % (len(line), debug_line)
137 return TPCNoteEvent(int(line[0]), int(line[1]), int(line[2]), int(line[3]))
138
140 return "%s %d %d %d %d" % (self.EVENT_NAME, self.start,
141 self.end, self.note, self.pitch_class)
142
145
147 """
148 A beat identifier, giving a time of occurrence and a beat level.
149
150 """
151 EVENT_NAME = "Beat"
152
154 self.time = time
155 self.level = level
156
157 @staticmethod
159 debug_line = " ".join(line)
160 if len(line) != 2:
161 raise InputError, "Beat event needs 2 arguments. %d "\
162 "found: %s" % (len(line), debug_line)
163 return BeatEvent(int(line[0]), int(line[1]))
164
168
171
173 """
174 A chord, identified just by its start time, end time and root. The
175 root is a position on the line of fifths.
176
177 @see: L{TPCNoteEvent}
178
179 """
180 EVENT_NAME = "Chord"
181
183 self.start = start
184 self.end = end
185 self.root = root
186
187 @staticmethod
189 debug_line = " ".join(line)
190 if len(line) != 3:
191 raise InputError, "Chord event needs 3 arguments. %d "\
192 "found: %s" % (len(line), debug_line)
193 return ChordEvent(int(line[0]), int(line[1]), int(line[2]))
194
196 return "%s %d %d %d" % (self.EVENT_NAME, self.start,
197 self.end, self.root)
198
201
203 """
204 Data structure to store a list of events read in from a corpus
205 file.
206
207 """
209 self._events = []
210 self._event_types = {}
211 self._ordered_types = []
212
219
221 return list(sorted(self._events))
222 events = property(__get_events)
223
225 """
226 Returns a list of all events of a specific type.
227
228 """
229 if type in self._event_types:
230 return list(sorted(self._event_types[type]))
231 else:
232 return []
233
235 """
236 Returns a list of all events, grouped by their event type,
237 with the event types in the order they were found in the input.
238
239 This should provide a form suitable for outputing to the lines
240 of a file (using C{to_line()} on each event). Alternatively,
241 just use C{to_lines()}.
242
243 """
244 return sum([self.get_events_by_type(typ) for typ in self._ordered_types], [])
245
247 """
248 Returns a list of lines suitable for outputing to a file.
249
250 """
251 return [ev.to_line() for ev in self.get_grouped_events()]
252
253 @staticmethod
255 """
256 Creates a new DataSequence to represent the data in a file.
257
258 @type infile: str or open file object
259 @param infile: filename or file object. Filename may be the
260 path to the file or the path within the corpus.
261
262 """
263 if type(infile) == str:
264 if not os.path.exists(infile):
265
266 infile = get_corpus_file('kp', infile)
267 infile = open(infile, 'r')
268
269 lines = infile.readlines()
270 dseq = DataSequence()
271 for line in lines:
272 dseq.add_event(Event.from_line(line))
273 return dseq
274
276 evs = self.events
277 if len(evs) > 10:
278 evs = evs[:9] + ["...", evs[-1]]
279 return "<Temperley: %s>" % ", ".join([str(ev) for ev in evs])
280
283
285 """
286 Constructs a L{MIDI EventStream<midi.EventStream>} from the
287 data in this stream.
288 This can then be output to a file to be played.
289
290 Note that TPCNotes will be output as normal MIDI notes. We
291 can't do anything of the clever tuning stuff that we can do
292 with tonal space coordinates, since we'd need to do a further
293 step of analysis to work out the fully specified TS point from
294 the pitch class.
295
296 """
297 tempo = 120
298
299 from midi import EventStream, NoteOffEvent, NoteOnEvent, SetTempoEvent
300 mid = EventStream()
301 mid.add_track()
302
303 temp = SetTempoEvent()
304 temp.tempo = tempo
305 temp.tick = 0
306 mid.add_event(temp)
307
308 ticks_per_ms = float(mid.resolution) * tempo / 60000
309
310 for ev in self.events:
311 if isinstance(ev, TPCNoteEvent):
312
313 note = ev.note
314
315 noteon = NoteOnEvent()
316 noteon.pitch = note
317 noteon.tick = int(ev.start * ticks_per_ms)
318 noteon.velocity = 100
319 mid.add_event(noteon)
320
321 noteoff = NoteOffEvent()
322 noteoff.pitch = note
323 noteoff.tick = int(ev.end * ticks_per_ms)
324 noteoff.velocity = 100
325 mid.add_event(noteoff)
326 elif isinstance(ev, (ChordEvent,BeatEvent)):
327
328 continue
329 else:
330 raise TypeError, "event type %s not recognised by "\
331 "MIDI converter." % type(ev).__name__
332 return mid
333
334 read_file = DataSequence.from_file
338