jazzparser.data

1 """Data structures for the Jazz Parser. 2 3 Basic data types for the Jazz Parser. 4 5 """ 6 """ 7 ============================== License ======================================== 8 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 9 10 This file is part of The Jazz Parser. 11 12 The Jazz Parser is free software: you can redistribute it and/or modify 13 it under the terms of the GNU General Public License as published by 14 the Free Software Foundation, either version 3 of the License, or 15 (at your option) any later version. 16 17 The Jazz Parser is distributed in the hope that it will be useful, 18 but WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 GNU General Public License for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>. 24 25 ============================ End license ====================================== 26 27 """ 28 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>" 29 30 import copy, re 31 import logging 32 33 from jazzparser.utils.base import filter_latex 34 from jazzparser.utils.chords import int_to_chord_numeral, chord_numeral_to_int, \ 35 int_to_ly_note, ChordError, int_to_pitch_class, \ 36 pitch_class_to_int 37 from jazzparser.data.db_mirrors import Chord as MirrorChord 38 39 # Get the logger from the logging system 40 logger = logging.getLogger("main_logger") 41 42 ROMAN_REGEX = re.compile(r""" 43 ^ # Start at the very beginning 44 (?P<root> 45 (?P<accidental>b|\#)? # Flat or sharp, or neither 46 (?P<numeral>I{1,3}|I?V|VI{0,2}|X|Y|Z) # Chord numeral: I-III or IV-V or V-VII; or variable: X, Y, Z 47 ) 48 (?P<type>m(7|,b5|,M7)?|aug(7|,M7)? 49 |o7|%7|sus4(,7)?|b5(,7|,M7)? 50 |M7|7|\#5,m7)? # All the chord types (triad+1) we allow 51 ($(?P<additions>6|9|b9|b10 52 |13|\+9|\+11)$)? # Extra additions must come after the chord type in brackets 53 $ # Match the whole string 54 """, re.VERBOSE) 55 56 PITCH_REGEX = re.compile(r""" 57 ^ 58 (?P<root> 59 (?P<pitch>[A-G]) # Chord numeral: I-III or IV-V or V-VII; or variable: X, Y, Z 60 (?P<accidental>b|\#)? # Flat or sharp, or neither 61 ) 62 (?P<type>m(7|,b5|,M7)?|aug(7|,M7)? 63 |o7|%7|sus4(,7)?|b5(,7|,M7)? 64 |M7|7|\#5,m7)? # All the chord types (triad+1) we allow 65 ($(?P<additions>6|9|b9|b10 66 |13|\+9|\+11)$)? # Extra additions must come after the chord type in brackets 67 $ 68 """, re.VERBOSE)

69 70 71 -class Chord(object):

72 """ 73 A Chord object represents a single chord in an input 74 sequence to the parser. 75 76 This was the original way of processing input to the parser and still 77 lurks around. However, a better representation is provided by 78 L{jazzparser.data.db_mirrors.Chord}, which is specifically designed 79 to replicate the data structure in the corpus database. 80 Instances of this class can be converted to a db-mirror chord using 81 L{to_db_mirror}. This class is now used mainly for processing textual input. 82 83 """ 84 # These mirror the table sequences_chordtype in the annotator db 85 TYPE_SYMBOLS = { 86 1 : "", 87 2 : "m", 88 3 : "M7", 89 4 : "o7", 90 5 : "%7", 91 6 : "aug", 92 7 : "m,b5", 93 8 : "b5", 94 9 : "m,M7", 95 10 : "7", 96 11 : "m7", 97 12 : "aug7", 98 13 : "b5,7", 99 14 : "sus4", 100 15 : "sus4,7", 101 16 : "aug,M7", 102 17 : "b5,M7", 103 18 : "#5,m7", 104 } 105 # The types that can function as 7 chords 106 SEVEN_TYPES = [ 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 18 ] 107 # Any chord with additions == one of these has an implied 7 in it 108 SEVEN_IMPLIED_ADDITIONS = [ "9", "b9", "b10", "13", "+9", "+11" ] 109 # Certain chord types should be mapping to other types to realise an 110 # implied 7 if one exists in the additions 111 IMPLIED_SEVEN_MAPPINGS = { 112 1 : 10, 113 2 : 11, 114 6 : 12, 115 7 : 5, 116 8 : 13, 117 14: 15 118 } 119

120 - def _get_root(self):

121 return self._root

122 - def _set_root(self, value):

123 if value is not None: 124 value = value % 12 125 self._root = value

126

127 - def _get_root_numeral(self):

128 if self.roman: 129 return int_to_chord_numeral(self._root) 130 else: 131 return int_to_pitch_class(self._root)

132 - def _set_root_numeral(self, value):

133 if self.roman: 134 self._root = chord_numeral_to_int(value) 135 else: 136 self._root = pitch_class_to_int(value)

137

138 - def _get_type(self):

139 return Chord.TYPE_SYMBOLS[self._type]

140 - def _set_type(self, value):

141 if value is None: 142 value = "" 143 if value not in Chord.TYPE_SYMBOLS.values(): 144 raise ValueError, "%s is not a valid chord type: must be one of [%s]" \ 145 % (value, 146 ",".join(Chord.TYPE_SYMBOLS.values())) 147 else: 148 self._type = [key for key,val in Chord.TYPE_SYMBOLS.items() if val==value][0]

149 150 151 root = property(_get_root, _set_root) 152 root_numeral = property(_get_root_numeral, _set_root_numeral) 153 type = property(_get_type, _set_type) 154

155 - def __init__(self, root_numeral="C", type="", additions=None, roman=False):

156 self.roman = roman 157 self.root_numeral = root_numeral 158 self.type = type 159 if additions is None: 160 additions = "" 161 self.additions = additions

162

163 - def __hash__(self):

164 if self.root is None: 165 return 13 166 return self.root

167

168 - def __ne__(self, other):

169 return not (self == other)

170

171 - def __eq__(self, other):

172 return self.equal_bar_additions(other) and \ 173 self.additions == other.additions

174

175 - def equal_bar_additions(self, other):

176 return (type(self) == type(other)) and \ 177 (self.root == other.root) and \ 178 (self.type == other.type)

179

180 - def _get_tetrad_type(self):

181 """ 182 Returns the implicit or explicit tetrad type of the chord. This 183 is the chord type, but mapped so that any 7 implicit in the 184 additions is explicit in the type. 185 """ 186 if self.additions in Chord.SEVEN_IMPLIED_ADDITIONS and self._type in Chord.IMPLIED_SEVEN_MAPPINGS: 187 # There is an implied seven. 188 # The seven is not explicit in the chord type, so the 189 # type should be mapped 190 return Chord.TYPE_SYMBOLS[Chord.IMPLIED_SEVEN_MAPPINGS[self._type]] 191 return self.type

192 tetrad_type = property(_get_tetrad_type) 193

194 - def __str__(self):

195 # Base name is the chord name, e.g. "C#" 196 # Follow this by the chord type, e.g. "m" in "C#m" 197 name = "%s%s" % (self.root_numeral, self.type) 198 # If there are any additions, add them on the end 199 if self.additions: 200 name += "(%s)" % self.additions 201 return name

202

203 - def __repr__(self):

204 return str(self)

205

206 - def to_latex(self):

207 return filter_latex(str(self))

208 209 @staticmethod

210 - def interval(chord1, chord2):

211 """ Returns the interval from chord1 to chord2. """ 212 return (chord2.root - chord1.root) % 12

213 214 @staticmethod

215 - def from_name(chord_name, roman=False, permissive=False):

216 """ 217 Given a string chord name, returns the Chord object corresponding 218 to that name. 219 220 C{permissive} allows the label to be interpreted as a roman numeral 221 or a pitch label. 222 223 """ 224 # Use a clever regular expression to split up the chord name 225 if permissive: 226 # Try both regexes 227 parse = ROMAN_REGEX.match(chord_name.strip()) 228 if parse is None: 229 parse = PITCH_REGEX.match(chord_name.strip()) 230 roman = False 231 else: 232 roman = True 233 elif roman: 234 parse = ROMAN_REGEX.match(chord_name.strip()) 235 else: 236 parse = PITCH_REGEX.match(chord_name.strip()) 237 238 if parse is None: 239 raise ChordError, "invalid chord symbol (%s mode): %s" % \ 240 (permissive and "either" or (roman and "roman" or "pitch"), 241 chord_name) 242 result = parse.groupdict() 243 root = result['root'] 244 chord_type_name = result['type'] 245 additions = result['additions'] 246 247 # Build the Chord object using these results 248 return Chord(root_numeral=root, type=chord_type_name, 249 additions=additions, roman=roman)

250

251 - def to_db_mirror(self):

252 """ 253 Produce a db mirror Chord instance (see L{jazzparser.data.db_mirrors} 254 that represents the same chord as this. 255 256 """ 257 mirror = MirrorChord(root=self.root, 258 type=self.type, 259 additions=self.additions) 260 return mirror

261

262 263 -class DerivationTrace(object):

264 """ 265 Stores a trace of the derivation of a particular CCGCategory node 266 and is associated with that category. For parse results, these structures 267 will often be so large that there's no hope of being able to print the 268 thing (even recursing to count the size can be prohibitively slow!). 269 270 """

271 - def __init__(self, result, rule=None, args=[], word=None):

272 """ 273 rule and args may be specified to give the derivation node an 274 initial pointer to a rule that was applied and a list of the 275 arguments for that rule. 276 Add more rules using add_rule(). 277 278 All rule applications stored should have resulted in result. 279 """ 280 # Store a list of the rule applications and their arguments 281 self.rules = [] 282 self.result = result 283 self.word = word 284 if rule is not None: 285 self.rules.append((rule,args))

286

287 - def add_rule(self, rule, args=[]):

288 """ 289 Add a rule application to the derivation node (that resulted in the 290 same category as other rule applications stored here). 291 The rule is a pointer to the rule object that was applied. 292 The args is a list of the arguments to which the rule was applied. 293 """ 294 self.rules.append((rule,args))

295

296 - def add_rules_from_trace(self, other_trace):

297 self.rules.extend(other_trace.rules)

298

299 - def __str__(self):

300 return self.str_indent("")

301

302 - def str_indent(self, indent="", signfmt=str):

303 output = "" 304 output += indent + signfmt(self.result) + "\n" 305 if self.word is not None: 306 output += "%s | \"%s\"\n" % (indent, self.word) 307 else: 308 for rule in self.rules: 309 (ruleobj, arglist) = rule 310 output += indent + " | from %s (%s) applied to\n" % \ 311 (ruleobj.readable_rule, ruleobj.name) 312 for arg in arglist: 313 output += arg.str_indent(indent+" | ", signfmt=signfmt) 314 315 return output

316

317 - def get_size(self):

318 if self.word is not None: 319 return 1 320 else: 321 size = 0 322 for rule in self.rules: 323 size += 1 324 for arg in rule[1]: 325 size += arg.get_size() 326 return size

327

328 329 -class Fraction(object):

330 """ 331 Stores a rational fraction as a numerator and denominator. 332 Fractions can be output as strings and also read in from strings 333 (use Fraction(string=<string>), or just Fraction(<string>)). 334 335 The format used is "i", where i 336 is an integer if the fraction is an exact integer, or "i n/d", where 337 i is the integer part, n the numerator and d the denominator. 338 339 """

340 - def __init__(self, numerator=0, denominator=1, string=None):

341 self._denominator = 1 342 343 self.numerator = numerator 344 self.denominator = denominator 345 if isinstance(numerator, str): 346 string = numerator 347 if string is not None: 348 whole_string = string 349 # Get rid of extra spaces 350 string = string.strip() 351 # Check for negation at the beginning: has to negate whole fraction 352 # If we don't split this off now, it only negates the integer part 353 if string.startswith("-"): 354 neg = True 355 string = string.lstrip("-").strip() 356 else: 357 neg = False 358 # Split the integer part from the fractional part 359 string_parts = string.split(" ") 360 try: 361 if len(string_parts) == 1: 362 # Try splitting into a/b 363 fract_parts = [p.strip() for p in string.split("/")] 364 if len(fract_parts) == 1: 365 # Just an integer 366 self.numerator = int(string) 367 self.denominator = 1 368 elif len(fract_parts) == 2: 369 # a/b 370 self.numerator = int(fract_parts[0]) 371 self.denominator = int(fract_parts[1]) 372 else: 373 raise Fraction.ValueError, "Too many slashes in "\ 374 "fraction '%s'." % whole_string 375 else: 376 integer = int(string_parts[0]) 377 fract_parts = string_parts[1].split("/") 378 if len(fract_parts) == 2: 379 numerator = int(fract_parts[0]) 380 denominator = int(fract_parts[1]) 381 else: 382 raise Fraction.ValueError, "Too many slashes in "\ 383 "fraction '%s'." % whole_string 384 self.numerator = numerator + integer * denominator 385 self.denominator = denominator 386 except ValueError: 387 raise Fraction.ValueError, "Error parsing fraction "\ 388 "string '%s'." % string 389 if neg: 390 # There was a - at the beginning, so negate the whole thing 391 self.numerator = -self.numerator

392

393 - def _get_denominator(self):

394 return self._denominator

395 - def _set_denominator(self, val):

396 if val == 0: 397 raise ZeroDivisionError, "tried to set a Fraction's denominator "\ 398 "to zero" 399 self._denominator = val

400 denominator = property(_get_denominator, _set_denominator) 401

402 - def simplify(self):

403 # Find the highest common factor of the num and denom 404 hcf = euclid(self.numerator, self.denominator) 405 if hcf != 0: 406 self.numerator /= hcf 407 self.denominator /= hcf

408

409 - def simplified(self):

410 """ 411 Returns a simplified version of this fraction without modifying 412 the instance. 413 414 """ 415 # Find the highest common factor of the num and denom 416 hcf = euclid(self.numerator, self.denominator) 417 if hcf == 0: 418 numerator = self.numerator 419 denominator = self.denominator 420 else: 421 numerator = self.numerator / hcf 422 denominator = self.denominator / hcf 423 return Fraction(numerator, denominator)

424

425 - def reciprocal(self):

426 if self.numerator == 0: 427 raise ZeroDivisionError, "tried to take reciprocal of 0" 428 return Fraction(self.denominator, self.numerator)

429

430 - def __str__(self):

431 if self.denominator == 1: 432 return "%d" % self.numerator 433 elif self.numerator/self.denominator == 0: 434 return "%d/%d" % (self.numerator, self.denominator) 435 else: 436 return "%d %d/%d" % (self.numerator/self.denominator, \ 437 self.numerator % self.denominator, \ 438 self.denominator)

439 440 __repr__ = __str__ 441

442 - def to_latex(self):

443 if self.denominator == 1: 444 return "%d" % self.numerator 445 else: 446 return "%d \\frac{%d}{%d}" % (self.numerator/self.denominator, \ 447 self.numerator % self.denominator, \ 448 self.denominator)

449 450 ####### Operator overloading ####### 451

452 - def __add__(self, other):

453 if type(other) == int or type(other) == long: 454 result = Fraction(self.numerator + other*self.denominator, self.denominator) 455 elif type(other) == Fraction: 456 new_denom = self.denominator * other.denominator 457 result = Fraction((self.numerator*other.denominator \ 458 + other.numerator*self.denominator), new_denom) 459 elif type(other) == float: 460 return float(self) + other 461 else: 462 raise TypeError, "unsupported operand type for - with Fraction: %s" % type(other) 463 result.simplify() 464 return result

465 466 # For backwards compatibility... 467 plus = __add__ 468

469 - def __radd__(self, other):

470 # Addition works the same both ways 471 return self + other

472

473 - def __neg__(self):

474 return Fraction(-self.numerator, self.denominator)

475

476 - def __sub__(self, other):

477 return self + (-other)

478

479 - def __rsub__(self, other):

480 return (-self) + other

481

482 - def __mul__(self, other):

483 if type(other) == int or type(other) == long: 484 result = Fraction(self.numerator*other, self.denominator) 485 elif type(other) == Fraction: 486 result = Fraction(self.numerator*other.numerator, self.denominator*other.denominator) 487 elif type(other) == float: 488 return float(self) * other 489 else: 490 raise TypeError, "unsupported operand type for * with Fraction: %s" % type(other) 491 result.simplify() 492 return result

493

494 - def __rmul__(self, other):

495 # Multiplication works the same both ways 496 return self * other

497

498 - def __div__(self, other):

499 if type(other) == int or type(other) == long: 500 result = Fraction(self.numerator, self.denominator*other) 501 elif type(other) == Fraction: 502 result = Fraction(self.numerator*other.denominator, self.denominator*other.numerator) 503 elif type(other) == float: 504 return float(self) / other 505 else: 506 raise TypeError, "unsupported operand type for / with Fraction: %s" % type(other) 507 result.simplify() 508 return result

509

510 - def __rdiv__(self, other):

511 return self.reciprocal() * other

512

513 - def __float__(self):

514 return float(self.numerator) / self.denominator

515

516 - def __int__(self):

517 return self.numerator / self.denominator

518

519 - def __long__(self):

520 return long(self.numerator) / long(self.denominator)

521 522 ##################################### 523

524 - def __cmp__(self, other):

525 if other is None: 526 return 1 527 528 if type(other) == int: 529 other = Fraction(other) 530 531 if type(other) == float: 532 return cmp(float(self), other) 533 534 if other.__class__ != self.__class__: 535 raise TypeError, "Fraction.__cmp__(self,other) requires other to "\ 536 "be of type Fraction or int. Type was %s." % other.__class__ 537 # Cmp should not have any lasting effect on the objects 538 selfnum = self.numerator 539 othernum = other.numerator 540 selfnum *= other.denominator 541 othernum *= self.denominator 542 if selfnum == othernum: 543 return 0 544 elif selfnum > othernum: 545 return 1 546 else: 547 return -1

548

549 - def __hash__(self):

550 return self.numerator + self.denominator

551

552 - class ValueError(Exception):

553 pass

554

555 -def euclid(num1, num2):

556 while num2 != 0: 557 remainder = num1 % num2 558 num1 = num2 559 num2 = remainder 560 return num1

561

562 563 -class HashSet(object):

564 """ 565 A simple implementation of a hash table using a dictionary. 566 The table is a set, since it does not store duplicate entries. 567 568 Stores pointers both in a hash table (dictionary) and a list, 569 so that the values can be retreived quickly. 570 571 """

572 - def __init__(self):

573 self.table = {} 574 self.list = []

575

576 - def _add_existing_value(self, existing_value, new_value):

577 """ 578 When a value already exists in the table, this is called 579 instead of adding the value. By default, it does nothing 580 (i.e. drops the new value), but you can override it if you 581 want to do something else to combine the values. 582 583 Note that your custom methods must only modify the sign 584 that's already in the set (C{existing_value}), not add a 585 new sign and not modify C{new_value}. 586 587 """ 588 pass

589

590 - def append(self, new_entry):

591 """ 592 Appends the new entry to the set if it's not already there. 593 Returns true if the entry is added, false otherwise. 594 """ 595 # Look up its hash value 596 key = hash(new_entry) 597 if key not in self.table: 598 # The hash doesn't already exist: add a new list 599 self.table[key] = [] 600 elif new_entry in self.table[key]: 601 # It's already there. Don't add it again. 602 index = self.table[key].index(new_entry) 603 existing_sign = self.table[key][index] 604 self._add_existing_value(existing_sign, new_entry) 605 return False 606 # Add the entry to the correct list if it's not there 607 self.table[key].append(new_entry) 608 # Also store a pointer in the list 609 self.list.append(new_entry) 610 return True

611

612 - def extend(self, entries):

613 """ 614 Appends each of the given entries to the set. 615 Returns true if any of them is added, false otherwise. 616 """ 617 added = False 618 for entry in entries: 619 app = self.append(entry) 620 if app: 621 added = True 622 return added

623

624 - def __contains__(self, value):

625 # Look up the hash value 626 key = hash(value) 627 # Check whether the value's in the table 628 return value in self.table[key]

629

630 - def values(self):

631 return self.list

632

633 - def remove(self, entry):

634 key = hash(entry) 635 if entry not in self.table[key]: 636 raise ValueError, "Tried to remove an entry that's not in the hash set." 637 # Remove from the hash table 638 self.table[key].remove(entry) 639 # Also remove from the list 640 self.list.remove(entry) 641 return

642

643 - def __len__(self):

644 return len(self.values())

645

Source Code for Package jazzparser.data