1 """Elaborate ruse to allow NLTK's probability models to be stored to disk.
2
3 NLTK's classes can't all be pickled and it doesn't provide any other
4 way of storing things like probability distributions. This module
5 provides procedures to produce a picklable representation of various
6 NLTK classes.
7
8 """
9 """
10 ============================== License ========================================
11 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
12
13 This file is part of The Jazz Parser.
14
15 The Jazz Parser is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 The Jazz Parser is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
27
28 ============================ End license ======================================
29
30 """
31 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
32
33 from nltk.probability import MLEProbDist, FreqDist, ConditionalFreqDist, \
34 ConditionalProbDist, LaplaceProbDist, WittenBellProbDist, \
35 GoodTuringProbDist, DictionaryProbDist, \
36 DictionaryConditionalProbDist, MutableProbDist
37 import cPickle as pickle
40 """
41 Returns True is the given object can be successfully pickled,
42 False otherwise. This is just a neat way of catching a pickling
43 error and usually you'll be better off trying to pickle and
44 catching the exception.
45
46 """
47 try:
48
49 pickle.dumps(obj)
50 except pickle.PicklingError:
51
52 return False
53 return True
54
56 """
57 Interface for various storers that take certain types of objects
58 and produce a dictionary with the essential data needed to recreate
59 them. The dict's values should all be picklable.
60
61 The purpose of this is to define a storable form of NLTK's things
62 that don't have any storable representation.
63
64 """
65 STORED_CLASS = None
66
67 @classmethod
77
78 @classmethod
81
82 @staticmethod
84 raise NotImplementedError, "this storer should implement a dict_from_object method"
85
86 @staticmethod
88 raise NotImplementedError, "this storer should implement a dict_from_object method"
89
92
97 STORED_CLASS = FreqDist
98
99 @staticmethod
101 data = {}
102 data['counts'] = dict(obj)
103 return data
104
105 @staticmethod
107
108 if start_dist is None:
109 dist = FreqDist()
110 else:
111 dist = start_dist
112
113
114 for key,val in dic['counts'].items():
115 dist[key] = val
116 return dist
117
119 STORED_CLASS = ConditionalProbDist
120
121 @staticmethod
123 data = {}
124
125
126 if not is_picklable(obj._probdist_factory):
127 raise ObjectStorerError, "The probdist factory on the "\
128 "ConditionalProbDist is not picklable: %s" % type(obj._probdist_factory).__name__
129 if not is_picklable(obj._factory_args) or not is_picklable(obj._factory_kw_args):
130 raise ObjectStorerError, "Something in the probdist "\
131 "factory's args on the ConditionalProbDist is not "\
132 "picklable. They are: %s and %s" % (obj._factory_args, obj._factory_kw_args)
133 data['probdist_factory'] = obj._probdist_factory
134 data['cfdist'] = object_to_dict(obj._cfdist)
135 data['factory_args'] = obj._factory_args
136 data['factory_kw_args'] = obj._factory_kw_args
137 return data
138
139 @staticmethod
146
160
176
192
208
227
240
249
265
269 """
270 Returns an ObjectStorer subclass that store's the given type if one
271 is found. Raises an ObjectStorerError otherwise.
272
273 """
274 from . import STORERS
275 for storer in STORERS:
276 if storer.STORED_CLASS is cls:
277 return storer
278 raise ObjectStorerError, "could not get an object storer for type %s" % cls.__name__
279
282
285