Package jazzparser :: Package misc :: Package tree :: Module datastructs
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.misc.tree.datastructs

  1  """Data structures for representing generic trees. 
  2   
  3  These are for applying generic algorithms to. 
  4  Note that the trees represented here are ordered trees: their children are  
  5  ordered. They also contain some methods relating to unordered interpretation,  
  6  but take care if you want to implement an algorithm on unordered trees that  
  7  all your operations are unordered. 
  8   
  9  """ 
 10  """ 
 11  ============================== License ======================================== 
 12   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 13    
 14   This file is part of The Jazz Parser. 
 15    
 16   The Jazz Parser is free software: you can redistribute it and/or modify 
 17   it under the terms of the GNU General Public License as published by 
 18   the Free Software Foundation, either version 3 of the License, or 
 19   (at your option) any later version. 
 20    
 21   The Jazz Parser is distributed in the hope that it will be useful, 
 22   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 24   GNU General Public License for more details. 
 25    
 26   You should have received a copy of the GNU General Public License 
 27   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 28   
 29  ============================ End license ====================================== 
 30   
 31  """ 
 32  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
33 34 -class BaseTree(object):
35 """ 36 Base class for representing trees. Don't instantiate this: use 37 L{ImmutableTree} or L{MutableTree}. 38 39 """
40 - def __init__(self, root):
41 self.root = root
42
43 - def __getitem__(self, index):
44 # As a shorthand, let tree[i] point straight to tree.root[i] 45 return self.root[index]
46
47 - def __len__(self):
48 """ Number of nodes in total. """ 49 return len(self.root.all_nodes())
50
51 - def postorder(self):
52 """ 53 Returns a postorder list of all the nodes in this node's subtree, 54 including itself. 55 56 """ 57 return self.root.postorder()
58
59 - def postorder_index(self, node):
60 """ 61 Returns the index of this node in the tree according to a postorder 62 ordering. This is inefficient, because it has to recompute the 63 postorder every time. It is more efficient on L{ImmutableTree}, 64 because it can precompute the postorder. 65 66 Note that it is looking for a node by identity, not equality. 67 68 @raise ValueError: if the node is not found in the graph. 69 70 """ 71 try: 72 return [id(n) for n in self.postorder()].index(id(node)) 73 except ValueError: 74 # Give a more informative error 75 raise ValueError, "node '%s' (%d) not in tree" % (node, id(node))
76
77 - def find_parent(self, node):
78 """ 79 Searches for the by of the given node (by identity). Returns the parent, 80 or None if the node is not found. 81 82 """ 83 return self.root.find_parent(node)
84
85 - def replace_node(self, postorder_index, new_node):
86 """ 87 Removes the node and the given postorder index and replaces it 88 with another node. 89 90 """ 91 # Get the node itself that we're interested in 92 node = self.postorder()[postorder_index] 93 # Search for it in the tree to find its parent 94 parent = self.find_parent(node) 95 if parent is None: 96 return False 97 else: 98 i = [id(c) for c in parent.children].index(id(node)) 99 parent.children.pop(i) 100 parent.children.insert(i, new_node)
101
102 - def __str__(self):
103 return "%s" % self.root
104
105 - def __repr__(self):
106 return "%s:%s" % (type(self).__name__, self.root)
107
108 - def __eq__(self, other):
109 return isinstance(other, BaseTree) and self.root == other.root
110
111 - def unordered_equal(self, other):
112 return isinstance(other, BaseTree) and self.root.unordered_equal(other.root)
113
114 - def copy(self):
115 """ Deep copy """ 116 root = self.root.copy() 117 # Create a new tree with this root (using whatever type we were before) 118 return type(self)(root)
119
120 - def can_embed_subtree(self, node):
121 return self.root.can_embed_subtree(node)
122
123 124 -class Node(object):
125 """ 126 Node of a tree. 127 128 """
129 - def __init__(self, label, *args):
130 self.label = label 131 self.children = list(args)
132 133 @staticmethod
134 - def unode(*args):
135 """ Creates an unlabeled node. Label will be set to C{None}. """ 136 return Node(None, *args)
137
138 - def is_leaf(self):
139 return len(self.children) == 0
140
141 - def __len__(self):
142 """ Returns number of children """ 143 return len(self.children)
144
145 - def all_nodes(self):
146 return [self] + sum([child.all_nodes() for child in self.children], [])
147
148 - def __str__(self):
149 string = "" 150 # Represent this node 151 if self.label is None: 152 string += "." 153 else: 154 string += "%s" % str(self.label) 155 156 if len(self.children): 157 # Represent the children 158 child_strings = [] 159 for child in self.children: 160 child_strings.append(str(child)) 161 string += "(%s)" % " ".join(child_strings) 162 return string
163
164 - def __repr__(self):
165 if self.label is None: 166 return "<Node>" 167 else: 168 return "<Node '%s'>" % str(self.label)
169
170 - def __getitem__(self, index):
171 return self.children[index]
172
173 - def find_parent(self, node):
174 """ 175 Searches for the by of the given node (by identity). Returns all of 176 the parents of that node we can find. 177 178 """ 179 if id(node) in [id(n) for n in self.children]: 180 return self 181 else: 182 for child in self.children: 183 found = child.find_parent(node) 184 if found is not None: 185 return found 186 return None
187
188 - def postorder(self):
189 """ 190 Returns a postorder list of all the nodes in this node's subtree, 191 including itself. 192 193 """ 194 descs = [] 195 # Get nodes from all the children first 196 for child in self.children: 197 descs.extend(child.postorder()) 198 descs.append(self) 199 return descs
200
201 - def __eq__(self, other):
202 return type(other) == Node and \ 203 self.label == other.label and \ 204 len(self.children) == len(other.children) and \ 205 all(self.children[i] == other.children[i] for i in range(len(self.children)))
206
207 - def unordered_equal(self, other):
208 """ 209 Our tree representation preserves the order of children of a node. 210 Two trees are only equal according to __eq__ if their child ordering 211 is the same at every node. This method checks if the two can be 212 considered equal if the same ordering is not required. 213 214 """ 215 # We still require all this to hold 216 if type(other) != Node or self.label != other.label or \ 217 len(self.children) != len(other.children): 218 return False 219 # We need to match every child, but not in the same order 220 for child in self.children: 221 # Look for a match in the other 222 if not any(child.unordered_equal(other_child) for \ 223 other_child in other.children): 224 # No match found anywhere in the children 225 return False 226 # All children matched some child of other 227 return True
228
229 - def copy(self):
230 children = [child.copy() for child in self.children] 231 return Node(self.label, *children)
232
233 - def leftmost_leaf(self):
234 """ Returns the leftmost leaf of this subtree. """ 235 if self.is_leaf(): 236 return self 237 else: 238 return self.children[0].leftmost_leaf()
239
240 - def rightmost_leaf(self):
241 """ Returns the rightmost leaf of this subtree. """ 242 if self.is_leaf(): 243 return self 244 else: 245 return self.children[-1].rightmost_leaf()
246
247 248 -class ImmutableTree(BaseTree):
249 """ 250 Tree data structure. This is immutable and assumes that the data structure 251 will never be changed. Don't try to change it, as this will violate 252 key assumptions. Instead, convert to a mutable tree. 253 254 Immutability is not enforced at all, just assumed. 255 256 The advantage of using this over the mutable tree is that certain things, 257 like postorder numbers, can be precomputed, since we know the graph won't 258 change. 259 260 """
261 - def __init__(self, root):
262 # Make sure the whole tree is immutable 263 BaseTree.__init__(self, root) 264 if not isinstance(root, Node): 265 raise TypeError, "ImmutableTree must have a Node as a root: got "\ 266 "%s" % (type(root).__name__) 267 # Precompute the postorder of the nodes 268 self._postorder = super(ImmutableTree, self).postorder() 269 self._postorder_ids = [id(n) for n in self._postorder] 270 # Precompute the descendents of every node 271 self._descendents = {} 272 for node in self._postorder: 273 self._descendents[id(node)] = node.postorder()
274
275 - def mutable(self):
276 """ Get a mutable version of the graph. """ 277 return MutableTree(self.root.copy())
278
279 - def immutable(self):
280 return self
281
282 - def postorder(self):
283 # No need to recompute this - we've already done it 284 return self._postorder
285
286 - def postorder_index(self, node):
287 # Inherit doc 288 try: 289 return self._postorder_ids.index(id(node)) 290 except ValueError: 291 # Give a more informative error 292 raise ValueError, "node '%s' (%d) not in tree" % (node, id(node))
293
294 - def descendents(self, node):
295 """ 296 ImmutableTree can precompute the descendents of every node, because we 297 know they won't change. 298 299 """ 300 if id(node) not in self._descendents: 301 raise ValueError, "node '%s' (%d) not in tree" % (node, id(node)) 302 else: 303 return self._descendents[id(node)]
304
305 -class MutableTree(BaseTree):
306 """ 307 Normal mutable tree data structure. This should be used for most purposes. 308 309 """
310 - def __init__(self, root):
311 BaseTree.__init__(self, root)
312
313 - def immutable(self):
314 """ Get an immutable version of the graph. """ 315 return ImmutableTree(self.root.copy())
316
317 - def mutable(self):
318 return self
319