Package jazzparser :: Package misc :: Package tree :: Module balancedseq
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.misc.tree.balancedseq

  1  """Representation of trees as balanced sequences. 
  2   
  3  This representation is used by Lozano and Valiente, 2004 (On the Maximum  
  4  Common Embedded Subtree Problem for Ordered Trees). 
  5   
  6  We currently only support conversion of I{unlabeled} trees to balanced  
  7  sequences, though Lozano and Valiente do mention how their algorithms could  
  8  be extended to labeled trees. 
  9   
 10  """ 
 11  """ 
 12  ============================== License ======================================== 
 13   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 14    
 15   This file is part of The Jazz Parser. 
 16    
 17   The Jazz Parser is free software: you can redistribute it and/or modify 
 18   it under the terms of the GNU General Public License as published by 
 19   the Free Software Foundation, either version 3 of the License, or 
 20   (at your option) any later version. 
 21    
 22   The Jazz Parser is distributed in the hope that it will be useful, 
 23   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 24   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 25   GNU General Public License for more details. 
 26    
 27   You should have received a copy of the GNU General Public License 
 28   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 29   
 30  ============================ End license ====================================== 
 31   
 32  """ 
 33  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
34 35 -class BalancedSequence(object):
36 """ 37 Elements should be just 0s and 1s. 38 39 """
40 - def __init__(self, sequence):
41 self._items = sequence
42
43 - def __getitem__(self, i):
44 if type(i) == slice: 45 # Slice of a BalancedSequence should be a BalancedSequence itself 46 return BalancedSequence(self._items[i]) 47 return self._items[i]
48
49 - def __len__(self):
50 return len(self._items)
51
52 - def __str__(self):
53 return "".join(str(i) for i in self)
54
55 - def __repr__(self):
56 return str(self)
57
58 - def __add__(self, seq):
59 return BalancedSequence(self._items + seq._items)
60
61 - def __eq__(self, other):
62 return self._items == other._items
63
64 - def __hash__(self):
65 # Convert to a binary number 66 # This doesn't give a unique id to every sequence, since some items 67 # may constitute zero-padding 68 return int("".join(str(i) for i in self), 2)
69
70 - def check(self):
71 """ 72 Checks that the balanced sequence is in fact balanced. For efficiency, 73 this check is not carried out every time a sequence is created. You 74 should make sure, therefore, to call this whenever you might generate 75 an ill-formed sequence (e.g. when accepting input). 76 77 """ 78 # Check that every 0 is matched by a 1 79 if self._items.count(0) != self._items.count(1): 80 raise UnbalancedSequenceError, "unmatched brackets in sequence %s" \ 81 % self
82
83 - def balance(self, i):
84 """ Find the balanced subsequence beginning at i. """ 85 if self[i] != 0: 86 raise ValueError, "balanced subsequence must begin with a 0. "\ 87 "Cannot get a balanced sequence beginning at %dth item in %s" \ 88 % (i, self) 89 nesting = 1 90 cursor = i 91 while nesting > 0: 92 cursor += 1 93 if self[cursor] == 0: 94 nesting += 1 95 else: 96 nesting -= 1 97 return BalancedSequence(self._items[i:cursor+1])
98
99 - def head(self):
100 """ Pull the head out of the sequence """ 101 if len(self) == 0: 102 raise EmptySequenceError, "cannot get the head of an empty sequence" 103 first_tree = self.balance(0) 104 # Strip the outermost embedding from this 105 return first_tree[1:-1]
106
107 - def head_tail(self):
108 """ 109 Splits the sequence into its head and tail. Since the head has to 110 be computed in order to get the tail, it's best to do these at the 111 same time and throw away the head if you don't need it. 112 113 """ 114 # First get the head 115 head = self.head() 116 # Everything after the head is the tail 117 # The head had its outer level of nesting stripped, so we add 2 back on 118 tail = self[len(head)+2:] 119 return head,tail
120 121 @staticmethod
122 - def cat(head, tail):
123 """ 124 Produce a balanced sequence that is the result of taking C{head} as 125 the head and C{tail} as the tail. This is equivalent to:: 126 0<head>1<tail> 127 128 """ 129 return BalancedSequence([0]+head._items+[1]+tail._items)
130
131 - def decompose(self):
132 """ 133 Returns a set containing the composition of the balanced sequence. 134 This is as defined in definition 4 of the paper, I{decomp(s)}. 135 136 """ 137 return set(self._list_decompose())
138
139 - def _list_decompose(self):
140 """ 141 Instead of doing all our operations on sets, do the recursive 142 computation on lists and then convert to a set. 143 144 """ 145 # First include ourselves 146 decomp = [self] 147 # Split into head and tail 148 head,tail = self.head_tail() 149 # Include the decomposition of head, tail and head~tail 150 # These include the sequences themselves 151 if len(head): 152 decomp.extend(head._list_decompose()) 153 if len(tail): 154 decomp.extend(tail._list_decompose()) 155 if len(head) and len(tail): 156 decomp.extend((head+tail)._list_decompose()) 157 return decomp
158 159 @staticmethod
160 - def from_tree(tree):
161 """ 162 Converts an unlabeled tree representation to its equivalent 163 balanced sequence. If there are labels on the tree, they will just 164 be ignored. 165 166 """ 167 def _from_node(node): 168 if len(node.children) == 0: 169 # Leaf node: empty sequence 170 return [] 171 else: 172 node_seqs = [] 173 for child in node.children: 174 # Mark the start of the subtree by 0 175 node_seqs.append(0) 176 # Recurse 177 node_seqs.extend(_from_node(child)) 178 # Mark the end of the subtree by 1 179 node_seqs.append(1) 180 return node_seqs
181 182 # Get a sequence of 0s and 1s for the tree 183 seq = _from_node(tree.root) 184 # Make a balanced sequence from this 185 return BalancedSequence(seq)
186
187 - def to_tree(self):
188 """ 189 Creates an unlabeled tree from the balanced sequence. 190 191 """ 192 from .datastructs import Node, ImmutableTree 193 unode = Node.unode 194 195 # Make sure we're balanced, or else this won't work 196 self.check() 197 198 stack = [] 199 current_node = unode() 200 201 # Work through the sequence, spawning nodes as appropriate 202 for val in self: 203 if val == 0: 204 # This means a branch to a subtree 205 # Put the current node on the stack, so we can trace back up 206 stack.append(current_node) 207 # Spawn a new node as a child of the current node 208 new_node = unode() 209 current_node.children.append(new_node) 210 # Carry on down to this node's subtree 211 current_node = new_node 212 else: 213 # End of a subtree 214 # Move back up the tree to the node on top of the stack 215 current_node = stack.pop() 216 217 # If the tree was balanced, the stack should now be empty 218 assert len(stack) == 0 219 # The current node should be the root now 220 return ImmutableTree(current_node)
221
222 -class EmptySequenceError(Exception):
223 pass
224
225 -class UnbalancedSequenceError(Exception):
226 pass
227