1 """Data structures that mirror the database models.
2
3 In order to have a data format that can be passed around without
4 needing a database set up to import the data into, we need mirrors
5 of the database models that are not themselves dependent on a database.
6 These classes exactly replicate the data structure of the models in
7 apps.sequences.models and can be created from those models
8 (see model.mirror).
9
10 The intention of this is not that this data can be imported back into
11 the database, but simply that all the database's chord sequence data
12 can be read in (from a pickled file) and output to other formats.
13 These classes just provide the unified database-independent data models.
14 It is these exported models that are used for training models, etc.
15
16 Use load_pickled_data to read in a file that's been created from the
17 database models.
18
19 """
20 """
21 ============================== License ========================================
22 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
23
24 This file is part of The Jazz Parser.
25
26 The Jazz Parser is free software: you can redistribute it and/or modify
27 it under the terms of the GNU General Public License as published by
28 the Free Software Foundation, either version 3 of the License, or
29 (at your option) any later version.
30
31 The Jazz Parser is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 GNU General Public License for more details.
35
36 You should have received a copy of the GNU General Public License
37 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
38
39 ============================ End license ======================================
40
41 """
42 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
43
44 from jazzparser.utils.chords import int_to_pitch_class
47 """
48 A single instance of a chord in a sequence. These are chained by
49 their C{next} attribute into a sequence.
50
51 """
52 - def __init__(self, root=None, type=None, additions=None, bass=None,
53 next=None, duration=None, category=None, sequence=None,
54 treeinfo=None):
64
65
66
69
71 return str(unicode(self))
72 jazz_parser_input = property(__str__)
73
76
78 if self._treeinfo is not None:
79 return self._treeinfo
80 else:
81
82 return TreeInfo()
84 if ti is not None:
85 ti.chord = self
86 self._treeinfo = ti
87 treeinfo = property(_get_treeinfo, _set_treeinfo)
88
90 """
91 Additional information about a chord that allows an unambiguous
92 derivation tree to be built.
93
94 Stored as a separate class because it's a separate table in the
95 database.
96
97 """
98 - def __init__(self, coord_unresolved=False, coord_resolved=False):
99 self.coord_resolved = coord_resolved
100 self.coord_unresolved = coord_unresolved
101
103 """
104 A chord sequence in the corpus.
105
106 Chords in the sequence are stored in a linked list structure
107 (implemented by L{Chord}). The start of the list is given by
108 C{first_chord}.
109
110 You can also iterate over a C{ChordSequence} instance, which will
111 iterate over its chords.
112
113 """
114 - def __init__(self, name=None, key=None, bar_length=None, first_chord=None,
115 notes=None, analysis_omitted=None, omissions=None,
116 source=None, id=None):
117 self.name = name
118 self.key = key
119 self.bar_length = bar_length
120 self.first_chord = first_chord
121 self.notes = notes
122 self.analysis_omitted = analysis_omitted
123 self.omissions = omissions
124 self.source = source
125 self.id = id
126
127
128
130 return unicode(self.name)
131
133 return unicode(self).encode('ascii','replace')
134 string_name = property(_get_string_name)
135
137 chord = self.first_chord
138 while chord is not None:
139 yield chord
140 chord = chord.next
141
144
146 total = 0
147 annotated = 0
148 for chord in self.iterator():
149 if chord.category is not None and chord.category != "":
150 annotated += 1
151 total += 1
152 return (annotated, total)
153 number_annotated = property(_get_number_annotated)
154
156 annotated, total = self.number_annotated
157 return 100.0 * float(annotated) / float(total)
158 percentage_annotated = property(_get_percentage_annotated)
159
161 """
162 True if every chord in the sequence is annotated. This should
163 usually be a bit quicker than checking percentage_annotated.
164 """
165 for chord in self.iterator():
166 if chord.category is None or chord.category == "":
167 return False
168 return True
169 fully_annotated = property(_get_fully_annotated)
170
173 length = property(_get_length)
174 __len__ = _get_length
175
176 @property
184
186 """
187 Data from the database can be converted into the form of the above
188 models and saved to a file by pickling (see apps.sequences.datautils).
189 Without any dependence on the database, we can then read in such a
190 file and access all the sequence data.
191 Returns a list of ChordSequence mirrors.
192 """
193 import os.path, pickle
194 filename = os.path.abspath(filename)
195 file = open(filename, 'r')
196
197 unpick = pickle.Unpickler(file)
198 data = unpick.load()
199 file.close()
200 return data
201
203 """
204 Given a list of ChordSequence mirror instances, saves them to a
205 file by pickling.
206 This can be done directly from the database using
207 apps.sequences.datautils.pickle_all_sequences.
208
209 """
210 import pickle, os.path
211 filename = os.path.abspath(filename)
212 file = open(filename, 'w')
213 pickler = pickle.Pickler(file)
214 pickler.dump(sequences)
215 file.close()
216
218 """
219 Stores indexes and provides quick access to mirrored sequences.
220 Since we are not accessing the database directly, this provides
221 a reasonably efficent alternative to doing a linear search through
222 sequences every time when need one.
223
224 Iterating of a C{SequenceIndex} instance will iterate over its
225 sequences in order of id.
226
227 """
228 @staticmethod
231
235
237 return list(sorted(self._sequences, key=lambda s:s.id))
238 sequences = property(_get_sequences)
239
241 self._by_id = dict([(seq.id,seq) for seq in self.sequences])
242
244 if id in self._by_id:
245 return self._by_id[id]
246 else:
247 return None
248
250 return list(sorted(self._by_id.keys()))
251 ids = property(_get_ids)
252
254 return len(self._sequences)
255
261
263 if index >= len(self):
264 return None
265 else:
266 return list(sorted(self.ids))[index]
267
269 """
270 Get the index in the sequence file of the sequence with the
271 given id. Returns None if the id isn't in the sequence file.
272
273 """
274 if id not in self.ids:
275 return None
276 else:
277 return list(sorted(self.ids)).index(id)
278
281