Package jazzparser :: Package formalisms :: Package base :: Module syntax
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.formalisms.base.syntax

  1  """Generic base CCG syntax classes. 
  2   
  3  Defines the internal representation of CCG categories for the Jazz Parser. 
  4  These should be subclassed in specific formalisms. 
  5  Everything in these base classes should be behaviour common to all  
  6  CCG formalisms and defines the core CCG syntactic functionality. 
  7   
  8  """ 
  9  """ 
 10  ============================== License ======================================== 
 11   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 12    
 13   This file is part of The Jazz Parser. 
 14    
 15   The Jazz Parser is free software: you can redistribute it and/or modify 
 16   it under the terms of the GNU General Public License as published by 
 17   the Free Software Foundation, either version 3 of the License, or 
 18   (at your option) any later version. 
 19    
 20   The Jazz Parser is distributed in the hope that it will be useful, 
 21   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 23   GNU General Public License for more details. 
 24    
 25   You should have received a copy of the GNU General Public License 
 26   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 27   
 28  ============================ End license ====================================== 
 29   
 30  """ 
 31  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
 32   
 33  from jazzparser.utils.base import filter_latex 
 34  from jazzparser.utils.domxml import remove_unwanted_elements 
 35  from jazzparser.data.assignments import EquivalenceAssignment 
 36  from jazzparser.utils.chords import ChordError, chord_numeral_to_int, int_to_chord_numeral 
 37  from jazzparser.grammar import GrammarReadError 
 38  import logging, copy, re 
 39   
 40  # Get the logger from the logging system 
 41  logger = logging.getLogger("main_logger") 
 42   
43 -class SlashBase(object):
44 - def __init__(self, formalism, forward, id=0):
45 self.forward = forward 46 self.formalism = formalism 47 self.id = id
48
49 - def __hash__(self):
50 if self.forward: 51 return 1 52 else: 53 return 0
54
55 - def __str__(self):
56 if self.forward: 57 val = "/" 58 else: 59 val = "\\" 60 if hasattr(self, '_post_string'): 61 val += self._post_string 62 if hasattr(self, '_pre_string'): 63 val = self._pre_string + val 64 return val
65
66 - def to_latex(self):
67 if self.forward: 68 str = "/" 69 else: 70 str = "\\backslash " 71 return filter_latex(str)
72
73 - def __eq__(self, other):
74 if not ((self.__class__ == other.__class__) and \ 75 (self.forward == other.forward)): 76 return False 77 if hasattr(self, '_extra_eq') and not self._extra_eq(other): 78 return False 79 return True
80
81 - def __ne__(self, other):
82 return not (self == other)
83
84 - def __repr__(self):
85 return str(self)
86
87 - def copy(self):
88 return SlashBase(self.formalism, self.forward, self.id)
89
90 -class SignBase(object):
91 """ 92 A CCG category and its associated semantics: a CCG sign. 93 94 Keeps a note of which rules have been applied and which other 95 signs they were applied to, so that the parser can avoid re-applying 96 the same rule to the same inputs again. 97 98 """
99 - def __init__(self, formalism, category, semantics, derivation_trace=None):
100 """ 101 @type formalism: L{FormalismBase subclass<FormalismBase>} 102 @param formalism: the formalism of the subclass. 103 @type category: L{Category} 104 @param category: the top level node of the category instance. 105 @type semantics: L{Semantics<semantics.lambdacalc.Semantics>} 106 @param semantics: the semantics part of the sign 107 @type derivation_trace: L{DerivationTrace<jazzparser.data.DerivationTrace>} 108 @param derivation_trace: a derivation trace to store how the 109 sign was derived (optional). 110 111 """ 112 self.formalism = formalism 113 self.category = category 114 self.semantics = semantics 115 self.unary_rules_applied = False 116 self.derivation_trace = derivation_trace 117 # This is not used until results are being processed. We give it 118 # a default value so it will be clear if the value hasn't been stored. 119 self.result_index = -1 120 # Note which rules have been applied 121 self._unary_applied = [] 122 self._binary_applied = {}
123
124 - def __hash__(self):
125 return hash(self.category)
126
127 - def copy(self):
128 return SignBase(self.category.copy(),\ 129 self.semantics.copy(),\ 130 copy.copy(self.derivation_trace))
131
132 - def __str__(self):
133 return "%s:%s" % (self.category, self.semantics)
134
135 - def __eq__(self, other):
136 # Semantics need only be alpha-equivalent 137 return (self.__class__ == other.__class__) and \ 138 (self.category == other.category) and \ 139 (self.semantics.alpha_equivalent(other.semantics))
140
141 - def __ne__(self, other):
142 return not self.__eq__(other)
143
144 - def __repr__(self):
145 return str(self)
146
147 - def to_latex(self):
148 """ 149 @return: a Latex source representation of the object. 150 """ 151 return "$%s:%s$" % (self.category.to_latex(), \ 152 self.semantics.to_latex())
153
154 - def format_result(self):
155 """ 156 Format the sign as a string for display in a list of results. 157 By default, this just uses the class' str(), but subclasses 158 may provide an alternative format if required (you may want 159 signs to look different in results and in, e.g., chart 160 representation). 161 162 """ 163 return str(self)
164
165 - def format_latex_result(self):
166 """ 167 Same as L{format_result}, but returns latex source. By default 168 uses the class' L{to_latex} method. 169 170 @see: L{format_result} 171 172 """ 173 return self.to_latex()
174
175 - def apply_lexical_features(self, features):
176 """ 177 Given a dictionary of features, applies any changes to this 178 sign that need to be made before it is added to the chart on 179 the basis of surface-level features (e.g. pitch, time). 180 """ 181 return
182
183 - def set_time(self, time):
184 """ 185 Must be implemented by subclasses. Adds timing information 186 to components such that the start time of the whole sign 187 is encoded as the given time. 188 189 """ 190 raise NotImplementedError, "set_time must be implemented by Sign subclasses."
191
192 - def set_duration(self, duration):
193 """ 194 Must be implemented by subclasses. Adds duration information 195 to all components that store durations. 196 """ 197 raise NotImplementedError, "set_duration must be implemented by Sign subclasses."
198
199 - def check_rule_applied(self, rule, other_input=None):
200 """ 201 Returns True if the given rule instance has been applied to 202 this sign previously in the parse. If the rule is binary, 203 other_input should be given and this sign is assumed to be 204 the leftmore input. 205 206 """ 207 if rule.arity == 1: 208 return rule in self._unary_applied 209 else: 210 # Binary rule 211 if other_input is None: 212 raise ValueError, "tried to check whether a binary rule "\ 213 "has been applied, but didn't give a second input" 214 return rule in self._binary_applied and \ 215 id(other_input) in self._binary_applied[rule]
216
217 - def note_rule_applied(self, rule, other_input=None):
218 """ 219 Keeps a note that the given rule was applied to this sign. If 220 it is a binary rule, you must also specify what the second 221 input was. 222 223 """ 224 if rule.arity == 1: 225 self._unary_applied.append(rule) 226 else: 227 if other_input is None: 228 raise ValueError, "tried to note that a binary rule "\ 229 "has been applied, but didn't give a second input" 230 self._binary_applied.setdefault(rule, []).append(id(other_input))
231 232
233 -class Category(object):
234 """ 235 Parent class of categories (i.e. functional and atomic). 236 """
237 - def __init__(self, formalism):
238 self.formalism = formalism
239
240 - def __ne__(self, other):
241 return not (self == other)
242
243 - def __repr__(self):
244 return str(self)
245
246 - class CategoryParseError(Exception):
247 pass
248 249
250 -class ComplexCategoryBase(Category):
251 - def __init__(self, formalism, result, slash, argument):
252 """A slash category must be initialised with 253 a pair of categories (argument and result) that 254 appear on the right and left of the slash (respectively) 255 and a Slash object. 256 257 """ 258 super(ComplexCategoryBase, self).__init__(formalism) 259 self.result = result 260 self.argument = argument 261 self.slash = slash
262
263 - def __hash__(self):
264 return hash(self.result) + hash(self.argument) + hash(self.slash)
265
266 - def __str__(self):
267 out_string = "(" 268 out_string += str(self.result) 269 out_string += str(self.slash) 270 out_string += str(self.argument) 271 out_string += ")" 272 return out_string
273
274 - def to_latex(self):
275 out_string = "(" 276 out_string += self.result.to_latex() 277 out_string += self.slash.to_latex() 278 out_string += self.argument.to_latex() 279 out_string += ")" 280 return out_string
281
282 - def __eq__(self, other):
283 return (other.__class__ == self.__class__) and \ 284 (other.result == self.result) and \ 285 (other.argument == self.argument) and \ 286 (other.slash == self.slash)
287
288 - def copy(self):
289 return ComplexCategoryBase(self.formalism, 290 self.result.copy(), 291 self.slash.copy(), 292 self.argument.copy())
293
294 - def _get_slash_ids(self):
295 """ 296 Get a set of the ids on the slashes in this category. 297 """ 298 return self.argument.slash_ids | self.result.slash_ids | set([self.slash.id])
299 slash_ids = property(_get_slash_ids) 300
301 - def replace_slash_id(self, old_id, new_id):
302 if self.slash.id == old_id: 303 self.slash.id = new_id 304 self.argument.replace_slash_id(old_id, new_id) 305 self.result.replace_slash_id(old_id, new_id)
306
307 -class AtomicCategoryBase(Category):
308 """ 309 Much of the implementation of an atomic category is left to 310 subclasses, since this is where the most formalism-dependence is. 311 """
312 - def __init__(self, formalism):
314
315 - def __hash__(self):
316 return 0
317
318 - def __str__(self):
319 return "<?>"
320
321 - def to_latex(self):
322 return "\textbf{?}"
323
324 - def copy(self):
325 return AtomicCategoryBase(self.formalism)
326
327 - def _get_slash_ids(self):
328 return set()
329 slash_ids = property(_get_slash_ids) 330
331 - def replace_slash_id(self, old_id, new_id):
332 pass
333
334 -class DummyCategoryBase(Category):
335 """ 336 A category type with no combinatorial power at all. This should never be 337 used in derivations, but supplies something to put in the category part 338 of a sign that has only a semantics (e.g. one that comes from a backoff 339 model). 340 341 """
342 - def __init__(self, formalism):
344
345 - def __hash__(self):
346 -1
347
348 - def __str__(self):
349 return "DUMMY"
350
351 - def to_latex(self):
352 return "$\epsilon$"
353
354 - def copy(self):
355 return type(self)()
356 357 slash_ids = set()
358 - def replace_slash_id(self, old, new):
359 pass
360
361 -class VariableSubstitutor(object):
362 """ 363 An instance of VariableSubstitutor defines a type of variable forming 364 a component of categories. The instance defines how this variable type 365 is accessed and set, given a category and a key. 366 """
367 - def __init__(self, name, value_setter, key_replacer, canonical_key=min):
368 self.name = name 369 self.methods = { 370 'set' : value_setter, 371 'replace' : key_replacer, 372 'canonical' : canonical_key 373 }
374
375 - def substitute(self, category, key, value):
376 return self.methods['set'](category, key, value)
377 - def get_canonical_key(self, keylist):
378 return self.methods['canonical'](keylist)
379 - def replace_key(self, category, old_key, new_key):
380 self.methods['replace'](category, old_key, new_key)
381
382 - class InconsistencyError(Exception):
383 pass
384 - class SubstitutionError(Exception):
385 pass
386
387 -class VariableSubstitution(dict):
388 """ 389 A mapping from some sort of variable subject to unification that 390 appears in categories. 391 The types of variables included in the substitution are defined by 392 variable substitutors. It is recommended that you subclass 393 variable substitution to provide a type of substition suitable to a 394 formalism. 395 """
396 - def __init__(self, variable_types):
397 self.assignments = {} 398 self.types = {} 399 for type in variable_types: 400 super(VariableSubstitution, self).__setitem__(type.name, EquivalenceAssignment()) 401 self.types[type.name] = type 402 # Once this is set, the equations will never be consistent again 403 self._inconsistent = False
404
405 - def _is_inconsistent(self):
406 for ass in self.values(): 407 if ass.inconsistent: 408 return True 409 return self._inconsistent
410 inconsistent = property(_is_inconsistent) 411
412 - def __getitem__(self, type):
413 if type not in self: 414 raise VariableSubstitution.InvalidVariableTypeError, \ 415 "Tried to access assignments to a %s variable, but this "\ 416 "substitution doesn't store that type of assignment." % type 417 return super(VariableSubstitution, self).__getitem__(type)
418
419 - def __setitem__(self, type, assignment):
420 raise VariableSubstitution.VariableSubstitutionError, "Cannot change the variable "\ 421 "types of a variable substitution after instantiation"
422
423 - def add_assignment(self, type, key, value):
424 """ Constrain the key to take a value that matches this one. """ 425 self[type][key] = value
426
427 - def add_equality(self, type, key, other_key):
428 """ Constrain the two keys to take the same value. """ 429 self[type].add_equivalence(key, other_key)
430
431 - def get_assignment(self, type, key):
432 """ Returns the value that this key is constrained to match. """ 433 return self[type][key]
434
435 - def get_equalities(self, type, key, pop=False):
436 """ Returns all the key that are constrained to be equal to this one. 437 Optionally remove this equivalence class in the process. """ 438 if pop: 439 return self[type]._pop_class(key) 440 else: 441 return self[type]._get_class(key)
442
443 - def apply(self, category, make_copy=True):
444 """ Applies the variable substitution to the given category object. """ 445 if make_copy: 446 category = category.copy() 447 for typename,assignment in self.items(): 448 type = self.types[typename] 449 # For every key in the assignment that has a value, make 450 # the actual assignment 451 for key in assignment.keys(): 452 # Apply the setter to set the value of anything with 453 # this key in the category to be the assigned value 454 type.substitute(category, key, assignment[key]) 455 # Replace each key remaining with the canonical key for its 456 # equivalence class, so that the equivalence is implemented 457 # even if there's no assignment to that key. 458 keylists = assignment.classes 459 for keylist in keylists: 460 canonical = type.get_canonical_key(keylist) 461 for key in keylist: 462 if key != canonical: 463 type.replace_key(category, key, canonical) 464 return category
465
466 - def __str__(self):
467 return "".join(["%s:%s" % (type, ass) for type,ass in self.items()])
468
469 - def __repr__(self):
470 return str(self)
471
472 - class InvalidVariableTypeError(Exception):
473 pass
474 - class VariableSubstitutionError(Exception):
475 pass
476 477
478 -class UnificationResultBase(object):
479 """ 480 Class for the object returned as the result of unification. This 481 just bundles together the various bits of information that you 482 might need to get at after performing unification. 483 It should be subclassed to add information specific to the formalism. 484 """
485 - def __init__(self, result, constraints, inputs):
486 self.result = result 487 self.constraints = constraints 488 self.inputs = inputs
489
490 - def apply_all_mappings(self, obj):
491 """ 492 Most types of unification will require some mapping of variable 493 names to be applied to ensure they don't get clobbered. The 494 unification result should store all these mappings and supply 495 this method to apply them all at once to a category. 496 """ 497 raise NotImplementedError, "The unification result class %s has not "\ 498 "supplied an implementation of apply_all_mappings()" % type(self).__name__
499