1 """Utilities relating to data handling and processing.
2
3 This module does not define datatypes itself. See L{jazzparser.data}
4 for that.
5
6 """
7 """
8 ============================== License ========================================
9 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
10
11 This file is part of The Jazz Parser.
12
13 The Jazz Parser is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 The Jazz Parser is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
25
26 ============================ End license ======================================
27
28 """
29 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
30
32 if start is None or start == 0:
33 return full[end:]
34 elif end is None:
35 return full[:start]
36 else:
37 return full[:start] + full[end:]
38
40 """
41 Partitions the iterable input into the given number of partitions
42 and returns a list of subsets of the input with each of the
43 partitions excluded. Useful for doing heldout data evaluations.
44 """
45 partition_size = len(input) / partitions
46 heldout_sets = []
47 for partition in range(partitions-1):
48 heldout_sets.append(hold_out(input, partition_size*partition, partition_size*(partition+1)))
49
50 heldout_sets.append(hold_out(input, partition_size*(partitions-1), None))
51 return heldout_sets
52
54 """
55 The complement of holdout_partition. Simply splits the input
56 n ways.
57 """
58 partition_size = len(input) / partitions
59 parts = []
60 for partition in range(partitions-1):
61 parts.append(input[partition_size*partition: partition_size*(partition+1)])
62
63 parts.append(input[partition_size*(partitions-1):])
64 return parts
65