1 """Representation of trees as balanced sequences.
2
3 This representation is used by Lozano and Valiente, 2004 (On the Maximum
4 Common Embedded Subtree Problem for Ordered Trees).
5
6 We currently only support conversion of I{unlabeled} trees to balanced
7 sequences, though Lozano and Valiente do mention how their algorithms could
8 be extended to labeled trees.
9
10 """
11 """
12 ============================== License ========================================
13 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
14
15 This file is part of The Jazz Parser.
16
17 The Jazz Parser is free software: you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation, either version 3 of the License, or
20 (at your option) any later version.
21
22 The Jazz Parser is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 GNU General Public License for more details.
26
27 You should have received a copy of the GNU General Public License
28 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
29
30 ============================ End license ======================================
31
32 """
33 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
36 """
37 Elements should be just 0s and 1s.
38
39 """
41 self._items = sequence
42
48
50 return len(self._items)
51
53 return "".join(str(i) for i in self)
54
57
60
62 return self._items == other._items
63
65
66
67
68 return int("".join(str(i) for i in self), 2)
69
71 """
72 Checks that the balanced sequence is in fact balanced. For efficiency,
73 this check is not carried out every time a sequence is created. You
74 should make sure, therefore, to call this whenever you might generate
75 an ill-formed sequence (e.g. when accepting input).
76
77 """
78
79 if self._items.count(0) != self._items.count(1):
80 raise UnbalancedSequenceError, "unmatched brackets in sequence %s" \
81 % self
82
84 """ Find the balanced subsequence beginning at i. """
85 if self[i] != 0:
86 raise ValueError, "balanced subsequence must begin with a 0. "\
87 "Cannot get a balanced sequence beginning at %dth item in %s" \
88 % (i, self)
89 nesting = 1
90 cursor = i
91 while nesting > 0:
92 cursor += 1
93 if self[cursor] == 0:
94 nesting += 1
95 else:
96 nesting -= 1
97 return BalancedSequence(self._items[i:cursor+1])
98
100 """ Pull the head out of the sequence """
101 if len(self) == 0:
102 raise EmptySequenceError, "cannot get the head of an empty sequence"
103 first_tree = self.balance(0)
104
105 return first_tree[1:-1]
106
108 """
109 Splits the sequence into its head and tail. Since the head has to
110 be computed in order to get the tail, it's best to do these at the
111 same time and throw away the head if you don't need it.
112
113 """
114
115 head = self.head()
116
117
118 tail = self[len(head)+2:]
119 return head,tail
120
121 @staticmethod
122 - def cat(head, tail):
123 """
124 Produce a balanced sequence that is the result of taking C{head} as
125 the head and C{tail} as the tail. This is equivalent to::
126 0<head>1<tail>
127
128 """
129 return BalancedSequence([0]+head._items+[1]+tail._items)
130
132 """
133 Returns a set containing the composition of the balanced sequence.
134 This is as defined in definition 4 of the paper, I{decomp(s)}.
135
136 """
137 return set(self._list_decompose())
138
158
159 @staticmethod
161 """
162 Converts an unlabeled tree representation to its equivalent
163 balanced sequence. If there are labels on the tree, they will just
164 be ignored.
165
166 """
167 def _from_node(node):
168 if len(node.children) == 0:
169
170 return []
171 else:
172 node_seqs = []
173 for child in node.children:
174
175 node_seqs.append(0)
176
177 node_seqs.extend(_from_node(child))
178
179 node_seqs.append(1)
180 return node_seqs
181
182
183 seq = _from_node(tree.root)
184
185 return BalancedSequence(seq)
186
188 """
189 Creates an unlabeled tree from the balanced sequence.
190
191 """
192 from .datastructs import Node, ImmutableTree
193 unode = Node.unode
194
195
196 self.check()
197
198 stack = []
199 current_node = unode()
200
201
202 for val in self:
203 if val == 0:
204
205
206 stack.append(current_node)
207
208 new_node = unode()
209 current_node.children.append(new_node)
210
211 current_node = new_node
212 else:
213
214
215 current_node = stack.pop()
216
217
218 assert len(stack) == 0
219
220 return ImmutableTree(current_node)
221
224
227