1 """
2 Building derivation trees from annotated chord sequences.
3
4 Trees are minimally represented in the database. Each chord may define
5 certain pieces of additional tree information which influence the
6 way its tree structure is built.
7
8 A default tree structure is defined by a left-to-right parsing algorithm,
9 which applies rules as it may in a particular order. The algorithm does
10 not parse properly, but uses only the slash direction information to
11 decide what rule to apply.
12 Essentially this results in trees in which slash categories are always
13 composed as soon as possible and then applied. Consecutive atomic
14 categories must unambiguously be combined by continuation.
15 Coordination will never be used by the default trees. A minimal amount
16 of information (the end of each coordinated constituent) can be
17 specified on the chords to prompt the tree to use coordination. The
18 first constituent is always as long as possible (which is a reasonable
19 arbitrary canonical tree).
20
21 Note that this should not operate directly on sequences read from the
22 database, but on their mirrors (see jazzparser.data.db_mirrors), since
23 this allows it potentially to be used independently of the database.
24
25 """
26 """
27 ============================== License ========================================
28 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
29
30 This file is part of The Jazz Parser.
31
32 The Jazz Parser is free software: you can redistribute it and/or modify
33 it under the terms of the GNU General Public License as published by
34 the Free Software Foundation, either version 3 of the License, or
35 (at your option) any later version.
36
37 The Jazz Parser is distributed in the hope that it will be useful,
38 but WITHOUT ANY WARRANTY; without even the implied warranty of
39 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40 GNU General Public License for more details.
41
42 You should have received a copy of the GNU General Public License
43 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
44
45 ============================ End license ======================================
46
47 """
48 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
49
50 import jazzparser.settings as jazzsettings
51 from jazzparser.grammar import get_grammar
52 from jazzparser.utils.strings import strs
53
55 """
56 Partially represents a CCG category in a simple way. This only
57 contains the information that's important to inferring the tree
58 structure. It's a heavily abstracted form of CCG.
59
60 """
61 tree = None
62
65 complex = property(is_complex)
66
69
71 - def __init__(self, result, forward, argument):
72 self.result = result
73 self.forward = forward
74 self.argument = argument
75
77 return "%s%s%s" % (self.result, self.forward and "/" or "\\", self.argument)
78
84
86 return type(self) == type(other) and \
87 self.result == other.result and \
88 self.argument == other.argument and \
89 self.forward == other.forward
90
103
105 """
106 The categories of some chords are unknown and not specified in the
107 annotation. We still need to parse the rest of the sequence, so
108 we just leave this in the tree as a marker of the unknown category.
109
110 """
113
116
120
122 """
123 Special type to place markers among the categories in the stack.
124
125 """
126 pass
127
129 """
130 Marks the end of the first part of a coordination.
131
132 """
135 __repr__ = __str__
136
137
139 """
140 Superclass of nodes in syntactic trees. These classes are used to
141 represent the trees that we build using basic category information
142 with minimal manually-specified structural ambiguity resolution.
143
144 """
145 pass
146
148 """
149 An internal node in a syntactic tree.
150
151 """
153 self.children = children
154 self.rule = rule
155
157 return "(%s)%s" % (" ".join(["%s" % c for c in self.children]),self.rule)
158
161 span_length = property(_get_span_length)
162
164 """
165 A terminal node (leaf) in a syntactic tree.
166
167 """
168 - def __init__(self, chord, category=None):
171
173 return str(self.chord)
174
175 span_length = 1
176
178 """
179 The root of a syntactic tree. Note that this may be a parent node
180 of multiple tree unrelated by rule applications, if the derivation
181 did not build a single tree.
182
183 """
184 - def __init__(self, children, shift_reduce=None):
185 self.children = children
186 self.shift_reduce = shift_reduce
187
189 return "Trees:%s" % " | ".join([str(tree) for tree in self.children])
190
191
192
193 """
194 These are a very simple form of the grammatical rules we use in the
195 Music Keyspan formalism. They just do category structure checks.
196
197 """
199 """
200 Attach a non-terminal tree to a category on the basis of the inputs
201 from which it was built.
202 """
203 cat.tree = SyntacticNonTerminal([c.tree for c in inputs], rule)
204
206 """ Generic composition (use compf or compb). """
207 if len(stack) < 2:
208 return False
209 if not isinstance(stack[-1], GeneralizedCategory) or \
210 not isinstance(stack[-2], GeneralizedCategory):
211 return False
212 if not isinstance(stack[-2], SlashCategory) or \
213 not isinstance(stack[-1], SlashCategory) or \
214 not stack[-2].forward == dir or not stack[-1].forward == dir:
215 return False
216 else:
217 cat1 = stack.pop()
218 cat0 = stack.pop()
219 stack.append(SlashCategory(cat0.result.copy(), dir, cat1.argument.copy()))
220 attach_tree(stack[-1], [cat0, cat1], dir and "compf" or "compb")
221 return True
222
224 """ Forward composition """
225 return _comp(True, stack)
226 compf.name = "compf"
227
229 """ Backward composition """
230 return _comp(False, stack)
231 compb.name = "compb"
232
234 """ Forward application """
235 if len(stack) < 2:
236 return False
237 if not isinstance(stack[-1], GeneralizedCategory) or \
238 not isinstance(stack[-2], GeneralizedCategory):
239 return False
240 if not isinstance(stack[-2], SlashCategory) or \
241 not stack[-2].forward or \
242 not stack[-2].argument == stack[-1]:
243 return False
244 else:
245 cat1 = stack.pop()
246 cat0 = stack.pop()
247 stack.append(cat0.result.copy())
248 attach_tree(stack[-1], [cat0, cat1], "appf")
249 return True
250 appf.name = "appf"
251
253 """ Backward application """
254 if len(stack) < 2:
255 return False
256 if not isinstance(stack[-1], GeneralizedCategory) or \
257 not isinstance(stack[-2], GeneralizedCategory):
258 return False
259 if not isinstance(stack[-1], SlashCategory) or \
260 stack[-1].forward or \
261 not stack[-1].argument == stack[-2]:
262 return False
263 else:
264 cat1 = stack.pop()
265 cat0 = stack.pop()
266 stack.append(cat1.result.copy())
267 attach_tree(stack[-1], [cat0, cat1], "appb")
268 return True
269 appb.name = "appb"
270
287 cont.name = "cont"
288
290 """ Cadence coordination """
291
292 if len(stack) < 3:
293 return False
294 if not isinstance(stack[-1], GeneralizedCategory) or \
295 not isinstance(stack[-3], GeneralizedCategory) or \
296 not isinstance(stack[-2], CoordinationMiddleMarker):
297 return False
298 if not isinstance(stack[-3], SlashCategory) or \
299 not isinstance(stack[-1], SlashCategory) or \
300 not stack[-3].forward or not stack[-1].forward:
301 return False
302 else:
303 cat1 = stack.pop()
304 marker = stack.pop()
305 cat0 = stack.pop()
306 stack.append(cat0.copy())
307 attach_tree(stack[-1], [cat0, cat1], "coord")
308 return True
309 coord.name = "coord"
310
311
312
339
340
342 """
343 Run through the motions of parsing the sequence in order to build
344 its tree structure. Most of the structure is implicit in the
345 lexical categories. Additional information is given in the TreeInfo
346 model, associated with chords.
347
348 """
349
350 if grammar is None:
351 grammar = get_grammar()
352
353 if logger is None:
354 def _log(*args):
355 pass
356 else:
357 def _log(string, *args):
358 string = string % args
359 logger.info(string)
360
361 input = []
362 shift_reduce = []
363
364 categories = []
365 for chord in sequence.iterator():
366
367 if chord.category is None or chord.category == "":
368 category = None
369 cat_name = None
370 else:
371 if chord.category not in grammar.families:
372 raise TreeBuildError, "Could not find the category %s in "\
373 "the lexicon" % chord.category
374
375
376 category = grammar.families[chord.category][0].entries[0].sign.category
377 cat_name = chord.category
378
379 gen_cat = generalize_category(category, grammar.formalism)
380
381 gen_cat.tree = SyntacticTerminal(chord, category=cat_name)
382 input.append(gen_cat)
383 categories.append("%s <= %s" % (chord,category))
384 _log("CATEGORIES %s", categories)
385
386 input = list(reversed(input))
387 stack = []
388 rules = [ compf, compb, appf, appb, cont ]
389
390 while len(input) > 0:
391
392 shift_reduce.append("S")
393 stack.append(input.pop())
394 if debug_stack:
395 print stack
396 _log("SHIFT stack = %s, input = %s", stack, input)
397
398
399 coord_unresolved = False
400 coord_resolved = False
401 if stack[-1].tree.chord.treeinfo.coord_unresolved:
402
403
404 coord_unresolved = True
405 if stack[-1].tree.chord.treeinfo.coord_resolved:
406
407
408 coord_resolved = True
409
410
411
412 changed = True
413 while changed:
414 changed = False
415
416 for rule in rules:
417 res = rule(stack)
418 if res:
419 shift_reduce.append("R(%s)" % rule.name)
420 changed = True
421 _log("REDUCE %s, stack = %s", rule.name, stack)
422
423 if coord_resolved:
424
425 coord(stack)
426 if coord_unresolved:
427
428
429 stack.append(CoordinationMiddleMarker())
430 for cat in stack:
431 if isinstance(cat, CoordinationMiddleMarker):
432 raise TreeBuildError, "Coordination middle marker not "\
433 "matched by an end marker. Stack: %s" % strs(stack, ", ")
434 tree = SyntacticTreeRoot([cat.tree for cat in stack], shift_reduce=shift_reduce)
435 return tree
436
438 """
439 Given a syntactic tree using the SyntacticTreeNode classes,
440 produces an NLTK tree.
441 This is useful, for example, to display a tree: tree.draw().
442 You need NLTK installed to be able to do this.
443
444 """
445 from jazzparser.utils.base import load_optional_package
446 tree = load_optional_package('nltk.tree', 'NLTK', 'doing NLTK tree operations')
447 def _node_to_nltk_tree(node):
448 if isinstance(node, SyntacticNonTerminal):
449
450 children = []
451 for child in node.children:
452 children.append(_node_to_nltk_tree(child))
453 return tree.Tree("%s" % node.rule, children)
454 elif isinstance(node, SyntacticTreeRoot):
455
456 children = []
457 for child in node.children:
458 children.append(_node_to_nltk_tree(child))
459 return tree.Tree("Root", children)
460 elif isinstance(node, SyntacticTerminal):
461
462 leaf = tree.Tree("%s" % node.chord, [])
463 if node.category is not None:
464
465 leaf = tree.Tree("%s" % node.category, [leaf])
466 return leaf
467 return _node_to_nltk_tree(intree)
468
469
472