Package jazzparser :: Package utils :: Package nltk :: Module hmm
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.utils.nltk.hmm

 1  """Tools to extend NLTK's implementation of HMMs. 
 2   
 3  """ 
 4  """ 
 5  ============================== License ======================================== 
 6   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 7    
 8   This file is part of The Jazz Parser. 
 9    
10   The Jazz Parser is free software: you can redistribute it and/or modify 
11   it under the terms of the GNU General Public License as published by 
12   the Free Software Foundation, either version 3 of the License, or 
13   (at your option) any later version. 
14    
15   The Jazz Parser is distributed in the hope that it will be useful, 
16   but WITHOUT ANY WARRANTY; without even the implied warranty of 
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
18   GNU General Public License for more details. 
19    
20   You should have received a copy of the GNU General Public License 
21   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
22   
23  ============================ End license ====================================== 
24   
25  """ 
26  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
27   
28   
29  # The NLTK import has already been checked in our __init__.py 
30  from nltk.tag.hmm import HiddenMarkovModelTrainer, HiddenMarkovModelTagger 
31  from jazzparser.utils.nltk.storage import ObjectStorer 
32 33 -class PicklableHmmTrainer(HiddenMarkovModelTrainer):
34 """ 35 We override HiddenMarkovModelTrainer to overcome the fact that it 36 produces HMMs that can't be pickled. We only make supervised 37 trained HMMs picklable at the moment. 38 39 This is quite a nasty hack to overcome the fact that NLTK HMMs 40 can't be stored and also can't be pickled if constructed using the 41 default trainer. However, this is not very stable, since someone 42 could, for example, set some attribute of the model to be a Python 43 lambda and pickling would once again fail. 44 45 """
46 - def train_supervised(self, *args, **kwargs):
47 """ 48 If you set 'estimator' in the kwargs, make sure it's a top-level 49 named function, not a lambda, or else you won't be able to pickle 50 your HMM. 51 52 """ 53 from jazzparser.utils.nltk.probability import mle_estimator 54 estimator = kwargs.get('estimator') 55 if estimator is None: 56 estimator = mle_estimator 57 kwargs['estimator'] = estimator 58 return super(PicklableHmmTrainer, self).train_supervised(*args, **kwargs)
59
60 -class HiddenMarkovModelTaggerStorer(ObjectStorer):
61 STORED_CLASS = HiddenMarkovModelTagger 62 63 @staticmethod
64 - def _object_to_dict(obj):
65 from .storage import object_to_dict 66 data = {} 67 # The states and symbols are just dicts of strings, so we're 68 # fine to leave them as they are 69 data['states'] = obj._states 70 data['symbols'] = obj._symbols 71 # The prior distribution is a FreqDist, which needs to be processed 72 data['priors'] = object_to_dict(obj._priors) 73 # The outputs and transitions are ConditionalProbDists 74 data['outputs'] = object_to_dict(obj._outputs) 75 data['transitions'] = object_to_dict(obj._transitions) 76 return data
77 78 @staticmethod
79 - def _dict_to_object(dic):
80 from .storage import dict_to_object 81 states = dic['states'] 82 symbols = dic['symbols'] 83 priors = dict_to_object(dic['priors']) 84 outputs = dict_to_object(dic['outputs']) 85 transitions = dict_to_object(dic['transitions']) 86 return HiddenMarkovModelTagger(symbols, states, transitions, outputs, priors)
87