Package jazzparser :: Package utils :: Module data
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.utils.data

 1  """Utilities relating to data handling and processing. 
 2   
 3  This module does not define datatypes itself. See L{jazzparser.data}  
 4  for that. 
 5   
 6  """ 
 7  """ 
 8  ============================== License ======================================== 
 9   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
10    
11   This file is part of The Jazz Parser. 
12    
13   The Jazz Parser is free software: you can redistribute it and/or modify 
14   it under the terms of the GNU General Public License as published by 
15   the Free Software Foundation, either version 3 of the License, or 
16   (at your option) any later version. 
17    
18   The Jazz Parser is distributed in the hope that it will be useful, 
19   but WITHOUT ANY WARRANTY; without even the implied warranty of 
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
21   GNU General Public License for more details. 
22    
23   You should have received a copy of the GNU General Public License 
24   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
25   
26  ============================ End license ====================================== 
27   
28  """ 
29  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>" 
30   
31 -def hold_out(full, start, end):
32 if start is None or start == 0: 33 return full[end:] 34 elif end is None: 35 return full[:start] 36 else: 37 return full[:start] + full[end:]
38
39 -def holdout_partition(input, partitions):
40 """ 41 Partitions the iterable input into the given number of partitions 42 and returns a list of subsets of the input with each of the 43 partitions excluded. Useful for doing heldout data evaluations. 44 """ 45 partition_size = len(input) / partitions 46 heldout_sets = [] 47 for partition in range(partitions-1): 48 heldout_sets.append(hold_out(input, partition_size*partition, partition_size*(partition+1))) 49 # Last partition: throw in everything that's left 50 heldout_sets.append(hold_out(input, partition_size*(partitions-1), None)) 51 return heldout_sets
52
53 -def partition(input, partitions):
54 """ 55 The complement of holdout_partition. Simply splits the input 56 n ways. 57 """ 58 partition_size = len(input) / partitions 59 parts = [] 60 for partition in range(partitions-1): 61 parts.append(input[partition_size*partition: partition_size*(partition+1)]) 62 # Last partition: throw what's left in 63 parts.append(input[partition_size*(partitions-1):]) 64 return parts
65