Package jazzparser :: Package taggers :: Package pretagged :: Module tagger
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.taggers.pretagged.tagger

  1  """A more or less null tagger that assigns just a fixed sequence of tags. 
  2   
  3  This tagger should not be used in practice. It is for use in parsing  
  4  annotated sequences to verify that the annotated derivation structure  
  5  is possible and to produce the chart that that derivation structure  
  6  would produce. 
  7   
  8  It is instantiated with a sequence of tags and, irrespective of the  
  9  input (which can be None), returns only those tags. If the input is  
 10  not None it should be a list of the same length as the tag list. You may  
 11  want to include input chords so that the tree produced has a record  
 12  of the chords at the leaves. 
 13   
 14  """ 
 15  """ 
 16  ============================== License ======================================== 
 17   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
 18    
 19   This file is part of The Jazz Parser. 
 20    
 21   The Jazz Parser is free software: you can redistribute it and/or modify 
 22   it under the terms of the GNU General Public License as published by 
 23   the Free Software Foundation, either version 3 of the License, or 
 24   (at your option) any later version. 
 25    
 26   The Jazz Parser is distributed in the hope that it will be useful, 
 27   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 28   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 29   GNU General Public License for more details. 
 30    
 31   You should have received a copy of the GNU General Public License 
 32   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 33   
 34  ============================ End license ====================================== 
 35   
 36  """ 
 37  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
 38   
 39  from jazzparser import settings 
 40  from jazzparser.utils.input import assign_durations, strip_input 
 41  from jazzparser.taggers import Tagger 
 42  from jazzparser.data import Fraction 
 43  from jazzparser.data.input import DbInput 
 44  from jazzparser.utils.options import ModuleOption, file_option 
45 46 -class PretaggedTagger(Tagger):
47 """ 48 The input doesn't matter. Must be instantiated with a complete 49 set of tags. The tag list should contain lists of tags for each 50 word (usually each list will contain just one item). These are 51 what it will return. 52 53 """ 54 COMPATIBLE_FORMALISMS = [ 55 'music_roman', 56 'music_keyspan', 57 'music_halfspan', 58 ] 59 TAGGER_OPTIONS = [ 60 ModuleOption('tags', filter=file_option, 61 help_text="File to get tag sequence from", 62 usage="tags=X, where X a filename"), 63 ] 64 INPUT_TYPES = ['db', 'chords'] 65
66 - def __init__(self, grammar, input, options={}, tags=None):
67 if input is None: 68 self.input = ["--"] * len(tags) 69 else: 70 self.input = input 71 72 super(PretaggedTagger, self).__init__(grammar, input, options) 73 74 if 'tags' in self.options and tags is None: 75 # Load the tag sequence from a file 76 self.tags = get_tags_for_input(input, self.options['tags']) 77 else: 78 if tags is None: 79 raise ValueError, "PretaggedTagger must be supplied with a "\ 80 "keyword argument 'tags' or a tagger option 'tags' "\ 81 "to tell it what tags to return." 82 if len(self.input) != len(tags): 83 raise ValueError, "the input given to the PretaggedTagger "\ 84 "was not the same length as the list of tags." 85 self.tags = tags 86 87 if len(self.tags) and type(self.tags[0]) == str: 88 # These must be schemalabels, rather than signs 89 # Resolve each one to a sign 90 labels = list(self.tags) 91 signs = [] 92 for label,word in zip(labels,input): 93 # Retreive a sign from the grammar for this tag on this word 94 sign = grammar.get_sign_for_word_by_tag(word, label) 95 if sign is None: 96 raise ValueError, "could not get a sign for the tag '%s' "\ 97 "on the word '%s'" % (label,word) 98 signs.append([sign]) 99 self.tags = signs
100
101 - def get_signs_for_word(self, index, offset=0):
102 if offset > 0: 103 return [] 104 else: 105 all_signs = self.tags[index] 106 return [(sign, sign.tag, Fraction(1, len(all_signs))) for sign in all_signs]
107
108 - def get_word(self, index):
109 return self.input[index]
110
111 -class TagsFile(object):
112 """ 113 A file format for storing a list of tags, potentially multiple tag lists. 114 115 """
116 - def __init__(self, tags, filename="no-file"):
117 self.filename = filename 118 self.tags_by_id = (type(tags) == dict) 119 self.tags = tags
120 121 @staticmethod
122 - def from_file(filename):
123 tags_by_id = None 124 125 f = open(filename, 'r') 126 lines = f.readlines() 127 f.close() 128 129 tags = {} 130 131 # Get a tag sequence from each line 132 for line in lines: 133 line = line.strip() 134 line = line.rstrip("\n") 135 if line: 136 if line.startswith("id:"): 137 if tags_by_id == False: 138 # We've had a line without an id 139 raise IOError, "the tags file %s contains sequences with "\ 140 "an id and without" % filename 141 line = line.lstrip("id:") 142 linetags = line.split() 143 # The first element is the id and should be an int 144 id = int(linetags.pop(0)) 145 tags[id] = linetags 146 tags_by_id = True 147 else: 148 if tags_by_id == True: 149 # We've had a line with an id 150 raise IOError, "the tags file %s contains sequences with "\ 151 "an id and without" % filename 152 tags = line.split() 153 tags_by_id = False 154 return TagsFile(tags, filename=filename)
155
156 - def to_file(self, filename):
157 lines = [] 158 if self.tags_by_id: 159 # Stored as dictionary of sequences by id 160 for id,tags in self.tags.items(): 161 lines.append("id:%d %s" % (id," ".join(tags))) 162 else: 163 # Stored as a single list of tags 164 lines.append(" ".join(self.tags)) 165 166 f = open(filename, 'w') 167 try: 168 f.write("\n".join(lines)) 169 finally: 170 f.close()
171
172 - def get_tags_for_input(self, input):
173 if type(input) == DbInput and \ 174 hasattr(input, "id") and \ 175 input.id is not None and \ 176 self.tags_by_id: 177 # Use the tags for this sequence id 178 if input.id not in self.tags: 179 raise ValueError, "tags file %s has no tags for input id %d" \ 180 % (self.filename, input.id) 181 return self.tags[input.id] 182 elif self.tags_by_id: 183 # The tags are stored by id in the file, but the input has no id 184 raise ValueError, "tags file %s stores tag sequences by id, but "\ 185 "the input %s has no id associated with it" \ 186 % (self.filename, input) 187 else: 188 return self.tags
189
190 -def get_tags_for_input(input, tag_filename):
191 """ 192 Convenience method that loads up a tags file and pulls out the tag 193 sequence for the DbInput. 194 195 """ 196 tagsfile = TagsFile.from_file(tag_filename) 197 return tagsfile.get_tags_for_input(input)
198