jazzparser.taggers.pretagged.tagger

1 """A more or less null tagger that assigns just a fixed sequence of tags. 2 3 This tagger should not be used in practice. It is for use in parsing 4 annotated sequences to verify that the annotated derivation structure 5 is possible and to produce the chart that that derivation structure 6 would produce. 7 8 It is instantiated with a sequence of tags and, irrespective of the 9 input (which can be None), returns only those tags. If the input is 10 not None it should be a list of the same length as the tag list. You may 11 want to include input chords so that the tree produced has a record 12 of the chords at the leaves. 13 14 """ 15 """ 16 ============================== License ======================================== 17 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 18 19 This file is part of The Jazz Parser. 20 21 The Jazz Parser is free software: you can redistribute it and/or modify 22 it under the terms of the GNU General Public License as published by 23 the Free Software Foundation, either version 3 of the License, or 24 (at your option) any later version. 25 26 The Jazz Parser is distributed in the hope that it will be useful, 27 but WITHOUT ANY WARRANTY; without even the implied warranty of 28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29 GNU General Public License for more details. 30 31 You should have received a copy of the GNU General Public License 32 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>. 33 34 ============================ End license ====================================== 35 36 """ 37 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>" 38 39 from jazzparser import settings 40 from jazzparser.utils.input import assign_durations, strip_input 41 from jazzparser.taggers import Tagger 42 from jazzparser.data import Fraction 43 from jazzparser.data.input import DbInput 44 from jazzparser.utils.options import ModuleOption, file_option

45 46 -class PretaggedTagger(Tagger):

47 """ 48 The input doesn't matter. Must be instantiated with a complete 49 set of tags. The tag list should contain lists of tags for each 50 word (usually each list will contain just one item). These are 51 what it will return. 52 53 """ 54 COMPATIBLE_FORMALISMS = [ 55 'music_roman', 56 'music_keyspan', 57 'music_halfspan', 58 ] 59 TAGGER_OPTIONS = [ 60 ModuleOption('tags', filter=file_option, 61 help_text="File to get tag sequence from", 62 usage="tags=X, where X a filename"), 63 ] 64 INPUT_TYPES = ['db', 'chords'] 65

66 - def __init__(self, grammar, input, options={}, tags=None):

67 if input is None: 68 self.input = ["--"] * len(tags) 69 else: 70 self.input = input 71 72 super(PretaggedTagger, self).__init__(grammar, input, options) 73 74 if 'tags' in self.options and tags is None: 75 # Load the tag sequence from a file 76 self.tags = get_tags_for_input(input, self.options['tags']) 77 else: 78 if tags is None: 79 raise ValueError, "PretaggedTagger must be supplied with a "\ 80 "keyword argument 'tags' or a tagger option 'tags' "\ 81 "to tell it what tags to return." 82 if len(self.input) != len(tags): 83 raise ValueError, "the input given to the PretaggedTagger "\ 84 "was not the same length as the list of tags." 85 self.tags = tags 86 87 if len(self.tags) and type(self.tags[0]) == str: 88 # These must be schemalabels, rather than signs 89 # Resolve each one to a sign 90 labels = list(self.tags) 91 signs = [] 92 for label,word in zip(labels,input): 93 # Retreive a sign from the grammar for this tag on this word 94 sign = grammar.get_sign_for_word_by_tag(word, label) 95 if sign is None: 96 raise ValueError, "could not get a sign for the tag '%s' "\ 97 "on the word '%s'" % (label,word) 98 signs.append([sign]) 99 self.tags = signs

100

101 - def get_signs_for_word(self, index, offset=0):

102 if offset > 0: 103 return [] 104 else: 105 all_signs = self.tags[index] 106 return [(sign, sign.tag, Fraction(1, len(all_signs))) for sign in all_signs]

107

108 - def get_word(self, index):

109 return self.input[index]

110

111 -class TagsFile(object):

112 """ 113 A file format for storing a list of tags, potentially multiple tag lists. 114 115 """

116 - def __init__(self, tags, filename="no-file"):

117 self.filename = filename 118 self.tags_by_id = (type(tags) == dict) 119 self.tags = tags

120 121 @staticmethod

122 - def from_file(filename):

123 tags_by_id = None 124 125 f = open(filename, 'r') 126 lines = f.readlines() 127 f.close() 128 129 tags = {} 130 131 # Get a tag sequence from each line 132 for line in lines: 133 line = line.strip() 134 line = line.rstrip("\n") 135 if line: 136 if line.startswith("id:"): 137 if tags_by_id == False: 138 # We've had a line without an id 139 raise IOError, "the tags file %s contains sequences with "\ 140 "an id and without" % filename 141 line = line.lstrip("id:") 142 linetags = line.split() 143 # The first element is the id and should be an int 144 id = int(linetags.pop(0)) 145 tags[id] = linetags 146 tags_by_id = True 147 else: 148 if tags_by_id == True: 149 # We've had a line with an id 150 raise IOError, "the tags file %s contains sequences with "\ 151 "an id and without" % filename 152 tags = line.split() 153 tags_by_id = False 154 return TagsFile(tags, filename=filename)

155

156 - def to_file(self, filename):

157 lines = [] 158 if self.tags_by_id: 159 # Stored as dictionary of sequences by id 160 for id,tags in self.tags.items(): 161 lines.append("id:%d %s" % (id," ".join(tags))) 162 else: 163 # Stored as a single list of tags 164 lines.append(" ".join(self.tags)) 165 166 f = open(filename, 'w') 167 try: 168 f.write("\n".join(lines)) 169 finally: 170 f.close()

171

172 - def get_tags_for_input(self, input):

173 if type(input) == DbInput and \ 174 hasattr(input, "id") and \ 175 input.id is not None and \ 176 self.tags_by_id: 177 # Use the tags for this sequence id 178 if input.id not in self.tags: 179 raise ValueError, "tags file %s has no tags for input id %d" \ 180 % (self.filename, input.id) 181 return self.tags[input.id] 182 elif self.tags_by_id: 183 # The tags are stored by id in the file, but the input has no id 184 raise ValueError, "tags file %s stores tag sequences by id, but "\ 185 "the input %s has no id associated with it" \ 186 % (self.filename, input) 187 else: 188 return self.tags

189

190 -def get_tags_for_input(input, tag_filename):

191 """ 192 Convenience method that loads up a tags file and pulls out the tag 193 sequence for the DbInput. 194 195 """ 196 tagsfile = TagsFile.from_file(tag_filename) 197 return tagsfile.get_tags_for_input(input)

198

Source Code for Module jazzparser.taggers.pretagged.tagger