Package jazzparser :: Package utils :: Package nltk :: Module storage
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.utils.nltk.storage

  1  """Elaborate ruse to allow NLTK's probability models to be stored to disk. 
  2   
  3  NLTK's classes can't all be pickled and it doesn't provide any other  
  4  way of storing things like probability distributions. This module  
  5  provides procedures to produce a picklable representation of various  
  6  NLTK classes. 
  7   
  8  """ 
  9  """ 
 10  ============================== License ======================================== 
 11   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 12    
 13   This file is part of The Jazz Parser. 
 14    
 15   The Jazz Parser is free software: you can redistribute it and/or modify 
 16   it under the terms of the GNU General Public License as published by 
 17   the Free Software Foundation, either version 3 of the License, or 
 18   (at your option) any later version. 
 19    
 20   The Jazz Parser is distributed in the hope that it will be useful, 
 21   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 23   GNU General Public License for more details. 
 24    
 25   You should have received a copy of the GNU General Public License 
 26   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 27   
 28  ============================ End license ====================================== 
 29   
 30  """ 
 31  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
 32   
 33  from nltk.probability import MLEProbDist, FreqDist, ConditionalFreqDist, \ 
 34                      ConditionalProbDist, LaplaceProbDist, WittenBellProbDist, \ 
 35                      GoodTuringProbDist, DictionaryProbDist, \ 
 36                      DictionaryConditionalProbDist, MutableProbDist 
 37  import cPickle as pickle 
38 39 -def is_picklable(obj):
40 """ 41 Returns True is the given object can be successfully pickled, 42 False otherwise. This is just a neat way of catching a pickling 43 error and usually you'll be better off trying to pickle and 44 catching the exception. 45 46 """ 47 try: 48 # Try pickling 49 pickle.dumps(obj) 50 except pickle.PicklingError: 51 # Pickling failed! 52 return False 53 return True
54
55 -class ObjectStorer(object):
56 """ 57 Interface for various storers that take certain types of objects 58 and produce a dictionary with the essential data needed to recreate 59 them. The dict's values should all be picklable. 60 61 The purpose of this is to define a storable form of NLTK's things 62 that don't have any storable representation. 63 64 """ 65 STORED_CLASS = None 66 67 @classmethod
68 - def object_to_dict(cls, obj):
69 if not isinstance(obj, cls.STORED_CLASS): 70 raise ObjectStorerError, "%s can only "\ 71 "store objects of type %s, not %s" % \ 72 (cls.__name__, cls.STORED_CLASS.__name__, type(obj).__name__) 73 dic = cls._object_to_dict(obj) 74 # Keep a record of the type of this 75 dic['_type'] = cls.STORED_CLASS 76 return dic
77 78 @classmethod
79 - def dict_to_object(cls, dic):
80 return cls._dict_to_object(dic)
81 82 @staticmethod
83 - def _object_to_dict(obj):
84 raise NotImplementedError, "this storer should implement a dict_from_object method"
85 86 @staticmethod
87 - def _dict_to_object(dic):
88 raise NotImplementedError, "this storer should implement a dict_from_object method"
89
90 -class ObjectStorerError(Exception):
91 pass
92
93 94 ################## Basic storers for probability things ################ 95 96 -class FreqDistStorer(ObjectStorer):
97 STORED_CLASS = FreqDist 98 99 @staticmethod
100 - def _object_to_dict(obj):
101 data = {} 102 data['counts'] = dict(obj) 103 return data
104 105 @staticmethod
106 - def _dict_to_object(dic, start_dist=None):
107 # This is so that storers for overriding classes can call this 108 if start_dist is None: 109 dist = FreqDist() 110 else: 111 dist = start_dist 112 113 # Add the counts one by one so that the FreqDist gets properly built 114 for key,val in dic['counts'].items(): 115 dist[key] = val 116 return dist
117
118 -class ConditionalProbDistStorer(ObjectStorer):
119 STORED_CLASS = ConditionalProbDist 120 121 @staticmethod
122 - def _object_to_dict(obj):
123 data = {} 124 # We don't know what type this is 125 # We hope it's picklable 126 if not is_picklable(obj._probdist_factory): 127 raise ObjectStorerError, "The probdist factory on the "\ 128 "ConditionalProbDist is not picklable: %s" % type(obj._probdist_factory).__name__ 129 if not is_picklable(obj._factory_args) or not is_picklable(obj._factory_kw_args): 130 raise ObjectStorerError, "Something in the probdist "\ 131 "factory's args on the ConditionalProbDist is not "\ 132 "picklable. They are: %s and %s" % (obj._factory_args, obj._factory_kw_args) 133 data['probdist_factory'] = obj._probdist_factory 134 data['cfdist'] = object_to_dict(obj._cfdist) 135 data['factory_args'] = obj._factory_args 136 data['factory_kw_args'] = obj._factory_kw_args 137 return data
138 139 @staticmethod
140 - def _dict_to_object(dic):
141 return ConditionalProbDist( 142 dict_to_object(dic['cfdist']), 143 dic['probdist_factory'], 144 *dic['factory_args'], 145 **dic['factory_kw_args'])
146
147 -class MLEProbDistStorer(ObjectStorer):
148 STORED_CLASS = MLEProbDist 149 150 @staticmethod
151 - def _object_to_dict(obj):
152 return { 153 'freqdist' : object_to_dict(obj._freqdist), 154 }
155 156 @staticmethod
157 - def _dict_to_object(dic):
158 freqdist = dict_to_object(dic['freqdist']) 159 return MLEProbDist(freqdist)
160
161 -class LaplaceProbDistStorer(ObjectStorer):
162 STORED_CLASS = LaplaceProbDist 163 164 @staticmethod
165 - def _object_to_dict(obj):
166 return { 167 'freqdist' : object_to_dict(obj._freqdist), 168 'bins' : obj._bins, 169 }
170 171 @staticmethod
172 - def _dict_to_object(dic):
173 freqdist = dict_to_object(dic['freqdist']) 174 bins = dic['bins'] 175 return LaplaceProbDist(freqdist, bins)
176
177 -class WittenBellProbDistStorer(ObjectStorer):
178 STORED_CLASS = WittenBellProbDist 179 180 @staticmethod
181 - def _object_to_dict(obj):
182 return { 183 'freqdist' : object_to_dict(obj._freqdist), 184 'bins' : obj._Z + obj._T, 185 }
186 187 @staticmethod
188 - def _dict_to_object(dic):
189 freqdist = dict_to_object(dic['freqdist']) 190 bins = dic['bins'] 191 return WittenBellProbDist(freqdist, bins)
192
193 -class GoodTuringProbDistStorer(ObjectStorer):
194 STORED_CLASS = GoodTuringProbDist 195 196 @staticmethod
197 - def _object_to_dict(obj):
198 return { 199 'freqdist' : object_to_dict(obj._freqdist), 200 'bins' : obj._bins, 201 }
202 203 @staticmethod
204 - def _dict_to_object(dic):
205 freqdist = dict_to_object(dic['freqdist']) 206 bins = dic['bins'] 207 return GoodTuringProbDist(freqdist, bins)
208
209 -class ConditionalFreqDistStorer(ObjectStorer):
210 STORED_CLASS = ConditionalFreqDist 211 212 @staticmethod
213 - def _object_to_dict(obj):
214 data = {} 215 data['fdists'] = dict([ 216 (condition, object_to_dict(dist)) \ 217 for condition,dist in obj._fdists.items()]) 218 return data
219 220 @staticmethod
221 - def _dict_to_object(dic):
222 obj = ConditionalFreqDist() 223 obj._fdists = dict([ 224 (condition, dict_to_object(dist)) \ 225 for condition,dist in dic['fdists'].items()]) 226 return obj
227
228 -class DictionaryProbDistStorer(ObjectStorer):
229 STORED_CLASS = DictionaryProbDist 230 231 @staticmethod
232 - def _object_to_dict(obj):
233 return { 234 'dict' : dict((sample,obj.logprob(sample)) for sample in obj.samples()), 235 }
236 237 @staticmethod
238 - def _dict_to_object(dic):
239 return DictionaryProbDist(prob_dict=dic['dict'], log=True)
240
241 -class MutableProbDistStorer(DictionaryProbDistStorer):
242 STORED_CLASS = MutableProbDist 243 244 @staticmethod
245 - def _dict_to_object(dic):
246 return MutableProbDist( 247 DictionaryProbDist(prob_dict=dic['dict'], log=True), 248 samples=dic['dict'].keys())
249
250 -class DictionaryConditionalProbDistStorer(ObjectStorer):
251 STORED_CLASS = DictionaryConditionalProbDist 252 253 @staticmethod
254 - def _object_to_dict(obj):
255 # Each individual distribution needs to be storable too 256 dists = dict((cond, object_to_dict(obj[cond])) for cond in obj.conditions()) 257 return { 258 'dists' : dists, 259 }
260 261 @staticmethod
262 - def _dict_to_object(dic):
263 dists = dict((cond, dict_to_object(dist)) for (cond,dist) in dic['dists'].items()) 264 return DictionaryConditionalProbDist(dists)
265
266 ############################ Utilities ############################# 267 268 -def get_storer(cls):
269 """ 270 Returns an ObjectStorer subclass that store's the given type if one 271 is found. Raises an ObjectStorerError otherwise. 272 273 """ 274 from . import STORERS 275 for storer in STORERS: 276 if storer.STORED_CLASS is cls: 277 return storer 278 raise ObjectStorerError, "could not get an object storer for type %s" % cls.__name__
279
280 -def object_to_dict(obj):
281 return get_storer(type(obj)).object_to_dict(obj)
282
283 -def dict_to_object(dic):
284 return get_storer(dic['_type']).dict_to_object(dic)
285