1 """Generic base CCG syntax classes.
2
3 Defines the internal representation of CCG categories for the Jazz Parser.
4 These should be subclassed in specific formalisms.
5 Everything in these base classes should be behaviour common to all
6 CCG formalisms and defines the core CCG syntactic functionality.
7
8 """
9 """
10 ============================== License ========================================
11 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
12
13 This file is part of The Jazz Parser.
14
15 The Jazz Parser is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 The Jazz Parser is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
27
28 ============================ End license ======================================
29
30 """
31 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
32
33 from jazzparser.utils.base import filter_latex
34 from jazzparser.utils.domxml import remove_unwanted_elements
35 from jazzparser.data.assignments import EquivalenceAssignment
36 from jazzparser.utils.chords import ChordError, chord_numeral_to_int, int_to_chord_numeral
37 from jazzparser.grammar import GrammarReadError
38 import logging, copy, re
39
40
41 logger = logging.getLogger("main_logger")
42
44 - def __init__(self, formalism, forward, id=0):
48
50 if self.forward:
51 return 1
52 else:
53 return 0
54
56 if self.forward:
57 val = "/"
58 else:
59 val = "\\"
60 if hasattr(self, '_post_string'):
61 val += self._post_string
62 if hasattr(self, '_pre_string'):
63 val = self._pre_string + val
64 return val
65
67 if self.forward:
68 str = "/"
69 else:
70 str = "\\backslash "
71 return filter_latex(str)
72
74 if not ((self.__class__ == other.__class__) and \
75 (self.forward == other.forward)):
76 return False
77 if hasattr(self, '_extra_eq') and not self._extra_eq(other):
78 return False
79 return True
80
82 return not (self == other)
83
86
89
91 """
92 A CCG category and its associated semantics: a CCG sign.
93
94 Keeps a note of which rules have been applied and which other
95 signs they were applied to, so that the parser can avoid re-applying
96 the same rule to the same inputs again.
97
98 """
99 - def __init__(self, formalism, category, semantics, derivation_trace=None):
100 """
101 @type formalism: L{FormalismBase subclass<FormalismBase>}
102 @param formalism: the formalism of the subclass.
103 @type category: L{Category}
104 @param category: the top level node of the category instance.
105 @type semantics: L{Semantics<semantics.lambdacalc.Semantics>}
106 @param semantics: the semantics part of the sign
107 @type derivation_trace: L{DerivationTrace<jazzparser.data.DerivationTrace>}
108 @param derivation_trace: a derivation trace to store how the
109 sign was derived (optional).
110
111 """
112 self.formalism = formalism
113 self.category = category
114 self.semantics = semantics
115 self.unary_rules_applied = False
116 self.derivation_trace = derivation_trace
117
118
119 self.result_index = -1
120
121 self._unary_applied = []
122 self._binary_applied = {}
123
126
131
134
140
142 return not self.__eq__(other)
143
146
153
164
174
176 """
177 Given a dictionary of features, applies any changes to this
178 sign that need to be made before it is added to the chart on
179 the basis of surface-level features (e.g. pitch, time).
180 """
181 return
182
184 """
185 Must be implemented by subclasses. Adds timing information
186 to components such that the start time of the whole sign
187 is encoded as the given time.
188
189 """
190 raise NotImplementedError, "set_time must be implemented by Sign subclasses."
191
193 """
194 Must be implemented by subclasses. Adds duration information
195 to all components that store durations.
196 """
197 raise NotImplementedError, "set_duration must be implemented by Sign subclasses."
198
200 """
201 Returns True if the given rule instance has been applied to
202 this sign previously in the parse. If the rule is binary,
203 other_input should be given and this sign is assumed to be
204 the leftmore input.
205
206 """
207 if rule.arity == 1:
208 return rule in self._unary_applied
209 else:
210
211 if other_input is None:
212 raise ValueError, "tried to check whether a binary rule "\
213 "has been applied, but didn't give a second input"
214 return rule in self._binary_applied and \
215 id(other_input) in self._binary_applied[rule]
216
218 """
219 Keeps a note that the given rule was applied to this sign. If
220 it is a binary rule, you must also specify what the second
221 input was.
222
223 """
224 if rule.arity == 1:
225 self._unary_applied.append(rule)
226 else:
227 if other_input is None:
228 raise ValueError, "tried to note that a binary rule "\
229 "has been applied, but didn't give a second input"
230 self._binary_applied.setdefault(rule, []).append(id(other_input))
231
232
234 """
235 Parent class of categories (i.e. functional and atomic).
236 """
239
241 return not (self == other)
242
245
248
249
251 - def __init__(self, formalism, result, slash, argument):
252 """A slash category must be initialised with
253 a pair of categories (argument and result) that
254 appear on the right and left of the slash (respectively)
255 and a Slash object.
256
257 """
258 super(ComplexCategoryBase, self).__init__(formalism)
259 self.result = result
260 self.argument = argument
261 self.slash = slash
262
264 return hash(self.result) + hash(self.argument) + hash(self.slash)
265
267 out_string = "("
268 out_string += str(self.result)
269 out_string += str(self.slash)
270 out_string += str(self.argument)
271 out_string += ")"
272 return out_string
273
275 out_string = "("
276 out_string += self.result.to_latex()
277 out_string += self.slash.to_latex()
278 out_string += self.argument.to_latex()
279 out_string += ")"
280 return out_string
281
283 return (other.__class__ == self.__class__) and \
284 (other.result == self.result) and \
285 (other.argument == self.argument) and \
286 (other.slash == self.slash)
287
293
295 """
296 Get a set of the ids on the slashes in this category.
297 """
298 return self.argument.slash_ids | self.result.slash_ids | set([self.slash.id])
299 slash_ids = property(_get_slash_ids)
300
306
308 """
309 Much of the implementation of an atomic category is left to
310 subclasses, since this is where the most formalism-dependence is.
311 """
314
317
320
323
326
329 slash_ids = property(_get_slash_ids)
330
333
335 """
336 A category type with no combinatorial power at all. This should never be
337 used in derivations, but supplies something to put in the category part
338 of a sign that has only a semantics (e.g. one that comes from a backoff
339 model).
340
341 """
344
347
350
353
356
357 slash_ids = set()
360
362 """
363 An instance of VariableSubstitutor defines a type of variable forming
364 a component of categories. The instance defines how this variable type
365 is accessed and set, given a category and a key.
366 """
367 - def __init__(self, name, value_setter, key_replacer, canonical_key=min):
368 self.name = name
369 self.methods = {
370 'set' : value_setter,
371 'replace' : key_replacer,
372 'canonical' : canonical_key
373 }
374
378 return self.methods['canonical'](keylist)
380 self.methods['replace'](category, old_key, new_key)
381
386
388 """
389 A mapping from some sort of variable subject to unification that
390 appears in categories.
391 The types of variables included in the substitution are defined by
392 variable substitutors. It is recommended that you subclass
393 variable substitution to provide a type of substition suitable to a
394 formalism.
395 """
404
406 for ass in self.values():
407 if ass.inconsistent:
408 return True
409 return self._inconsistent
410 inconsistent = property(_is_inconsistent)
411
418
422
424 """ Constrain the key to take a value that matches this one. """
425 self[type][key] = value
426
428 """ Constrain the two keys to take the same value. """
429 self[type].add_equivalence(key, other_key)
430
432 """ Returns the value that this key is constrained to match. """
433 return self[type][key]
434
436 """ Returns all the key that are constrained to be equal to this one.
437 Optionally remove this equivalence class in the process. """
438 if pop:
439 return self[type]._pop_class(key)
440 else:
441 return self[type]._get_class(key)
442
443 - def apply(self, category, make_copy=True):
444 """ Applies the variable substitution to the given category object. """
445 if make_copy:
446 category = category.copy()
447 for typename,assignment in self.items():
448 type = self.types[typename]
449
450
451 for key in assignment.keys():
452
453
454 type.substitute(category, key, assignment[key])
455
456
457
458 keylists = assignment.classes
459 for keylist in keylists:
460 canonical = type.get_canonical_key(keylist)
461 for key in keylist:
462 if key != canonical:
463 type.replace_key(category, key, canonical)
464 return category
465
467 return "".join(["%s:%s" % (type, ass) for type,ass in self.items()])
468
471
476
477
479 """
480 Class for the object returned as the result of unification. This
481 just bundles together the various bits of information that you
482 might need to get at after performing unification.
483 It should be subclassed to add information specific to the formalism.
484 """
485 - def __init__(self, result, constraints, inputs):
486 self.result = result
487 self.constraints = constraints
488 self.inputs = inputs
489
491 """
492 Most types of unification will require some mapping of variable
493 names to be applied to ensure they don't get clobbered. The
494 unification result should store all these mappings and supply
495 this method to apply them all at once to a category.
496 """
497 raise NotImplementedError, "The unification result class %s has not "\
498 "supplied an implementation of apply_all_mappings()" % type(self).__name__
499