jazzparser.data.db

1 """Data structures that mirror the database models. 2 3 In order to have a data format that can be passed around without 4 needing a database set up to import the data into, we need mirrors 5 of the database models that are not themselves dependent on a database. 6 These classes exactly replicate the data structure of the models in 7 apps.sequences.models and can be created from those models 8 (see model.mirror). 9 10 The intention of this is not that this data can be imported back into 11 the database, but simply that all the database's chord sequence data 12 can be read in (from a pickled file) and output to other formats. 13 These classes just provide the unified database-independent data models. 14 It is these exported models that are used for training models, etc. 15 16 Use load_pickled_data to read in a file that's been created from the 17 database models. 18 19 """ 20 """ 21 ============================== License ======================================== 22 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 23 24 This file is part of The Jazz Parser. 25 26 The Jazz Parser is free software: you can redistribute it and/or modify 27 it under the terms of the GNU General Public License as published by 28 the Free Software Foundation, either version 3 of the License, or 29 (at your option) any later version. 30 31 The Jazz Parser is distributed in the hope that it will be useful, 32 but WITHOUT ANY WARRANTY; without even the implied warranty of 33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 34 GNU General Public License for more details. 35 36 You should have received a copy of the GNU General Public License 37 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>. 38 39 ============================ End license ====================================== 40 41 """ 42 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>" 43 44 from jazzparser.utils.chords import int_to_pitch_class

45 46 -class Chord(object):

47 """ 48 A single instance of a chord in a sequence. These are chained by 49 their C{next} attribute into a sequence. 50 51 """

52 - def __init__(self, root=None, type=None, additions=None, bass=None, 53 next=None, duration=None, category=None, sequence=None, 54 treeinfo=None):

55 self.root = root # Int 56 self.type = type # Store as a string 57 self.additions = additions # String 58 self.bass = bass # Int 59 self.next = next # Another Chord mirror 60 self.duration = duration # Int 61 self.category = category # String 62 self.sequence = sequence # Mirror of the sequence model 63 self.treeinfo = treeinfo

64 65 # The following methods are identical to the model's 66

67 - def __unicode__(self):

68 return unicode('%s%s' % (int_to_pitch_class(self.root), self.type))

69

70 - def __str__(self):

71 return str(unicode(self))

72 jazz_parser_input = property(__str__) 73

74 - def __repr__(self):

75 return str(self)

76

77 - def _get_treeinfo(self):

78 if self._treeinfo is not None: 79 return self._treeinfo 80 else: 81 # Return the default info 82 return TreeInfo()

83 - def _set_treeinfo(self, ti):

84 if ti is not None: 85 ti.chord = self 86 self._treeinfo = ti

87 treeinfo = property(_get_treeinfo, _set_treeinfo)

88

89 -class TreeInfo(object):

90 """ 91 Additional information about a chord that allows an unambiguous 92 derivation tree to be built. 93 94 Stored as a separate class because it's a separate table in the 95 database. 96 97 """

98 - def __init__(self, coord_unresolved=False, coord_resolved=False):

99 self.coord_resolved = coord_resolved 100 self.coord_unresolved = coord_unresolved

101

102 -class ChordSequence(object):

103 """ 104 A chord sequence in the corpus. 105 106 Chords in the sequence are stored in a linked list structure 107 (implemented by L{Chord}). The start of the list is given by 108 C{first_chord}. 109 110 You can also iterate over a C{ChordSequence} instance, which will 111 iterate over its chords. 112 113 """

114 - def __init__(self, name=None, key=None, bar_length=None, first_chord=None, 115 notes=None, analysis_omitted=None, omissions=None, 116 source=None, id=None):

117 self.name = name # String 118 self.key = key # String 119 self.bar_length = bar_length # Int 120 self.first_chord = first_chord # Mirror of a Chord 121 self.notes = notes # String 122 self.analysis_omitted = analysis_omitted # Bool 123 self.omissions = omissions # String 124 self.source = source # Store as a string 125 self.id = id

126 127 # The following methods are identical to the model's 128

129 - def __unicode__(self):

130 return unicode(self.name)

131

132 - def _get_string_name(self):

133 return unicode(self).encode('ascii','replace')

134 string_name = property(_get_string_name) 135

136 - def iterator(self):

137 chord = self.first_chord 138 while chord is not None: 139 yield chord 140 chord = chord.next

141

142 - def __iter__(self):

143 return self.iterator()

144

145 - def _get_number_annotated(self):

146 total = 0 147 annotated = 0 148 for chord in self.iterator(): 149 if chord.category is not None and chord.category != "": 150 annotated += 1 151 total += 1 152 return (annotated, total)

153 number_annotated = property(_get_number_annotated) 154

155 - def _get_percentage_annotated(self):

156 annotated, total = self.number_annotated 157 return 100.0 * float(annotated) / float(total)

158 percentage_annotated = property(_get_percentage_annotated) 159

160 - def _get_fully_annotated(self):

161 """ 162 True if every chord in the sequence is annotated. This should 163 usually be a bit quicker than checking percentage_annotated. 164 """ 165 for chord in self.iterator(): 166 if chord.category is None or chord.category == "": 167 return False 168 return True

169 fully_annotated = property(_get_fully_annotated) 170

171 - def _get_length(self):

172 return len(list(self.iterator()))

173 length = property(_get_length) 174 __len__ = _get_length 175 176 @property

177 - def time_map(self):

178 time = 0 179 time_map = {} 180 for chord in self.iterator(): 181 time_map[time] = chord 182 time += chord.duration 183 return time_map

184

185 -def load_pickled_data(filename):

186 """ 187 Data from the database can be converted into the form of the above 188 models and saved to a file by pickling (see apps.sequences.datautils). 189 Without any dependence on the database, we can then read in such a 190 file and access all the sequence data. 191 Returns a list of ChordSequence mirrors. 192 """ 193 import os.path, pickle 194 filename = os.path.abspath(filename) 195 file = open(filename, 'r') 196 # Read in the pickled data 197 unpick = pickle.Unpickler(file) 198 data = unpick.load() 199 file.close() 200 return data

201

202 -def save_sequences(filename, sequences):

203 """ 204 Given a list of ChordSequence mirror instances, saves them to a 205 file by pickling. 206 This can be done directly from the database using 207 apps.sequences.datautils.pickle_all_sequences. 208 209 """ 210 import pickle, os.path 211 filename = os.path.abspath(filename) 212 file = open(filename, 'w') 213 pickler = pickle.Pickler(file) 214 pickler.dump(sequences) 215 file.close()

216

217 -class SequenceIndex(object):

218 """ 219 Stores indexes and provides quick access to mirrored sequences. 220 Since we are not accessing the database directly, this provides 221 a reasonably efficent alternative to doing a linear search through 222 sequences every time when need one. 223 224 Iterating of a C{SequenceIndex} instance will iterate over its 225 sequences in order of id. 226 227 """ 228 @staticmethod

229 - def from_file(filename):

230 return SequenceIndex(load_pickled_data(filename))

231

232 - def __init__(self, sequences):

233 self._sequences = sequences 234 self.prepare_indices()

235

236 - def _get_sequences(self):

237 return list(sorted(self._sequences, key=lambda s:s.id))

238 sequences = property(_get_sequences) 239

240 - def prepare_indices(self):

241 self._by_id = dict([(seq.id,seq) for seq in self.sequences])

242

243 - def sequence_by_id(self, id):

244 if id in self._by_id: 245 return self._by_id[id] 246 else: 247 return None

248

249 - def _get_ids(self):

250 return list(sorted(self._by_id.keys()))

251 ids = property(_get_ids) 252

253 - def __len__(self):

254 return len(self._sequences)

255

256 - def sequence_by_index(self, index):

257 id = self.id_for_index(index) 258 if id is not None: 259 return self.sequence_by_id(id) 260 return

261

262 - def id_for_index(self, index):

263 if index >= len(self): 264 return None 265 else: 266 return list(sorted(self.ids))[index]

267

268 - def index_for_id(self, id):

269 """ 270 Get the index in the sequence file of the sequence with the 271 given id. Returns None if the id isn't in the sequence file. 272 273 """ 274 if id not in self.ids: 275 return None 276 else: 277 return list(sorted(self.ids)).index(id)

278

279 - def __iter__(self):

280 return iter(self.sequences)

281

Source Code for Package jazzparser.data.db_mirrors