Package jazzparser :: Package taggers :: Package candc :: Module utils
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.taggers.candc.utils

  1  """Some basic utilities used by the C&C tagger interface. 
  2   
  3  """ 
  4  """ 
  5  ============================== License ======================================== 
  6   Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 
  7    
  8   This file is part of The Jazz Parser. 
  9    
 10   The Jazz Parser is free software: you can redistribute it and/or modify 
 11   it under the terms of the GNU General Public License as published by 
 12   the Free Software Foundation, either version 3 of the License, or 
 13   (at your option) any later version. 
 14    
 15   The Jazz Parser is distributed in the hope that it will be useful, 
 16   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18   GNU General Public License for more details. 
 19    
 20   You should have received a copy of the GNU General Public License 
 21   along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>. 
 22   
 23  ============================ End license ====================================== 
 24   
 25  """ 
 26  __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"  
 27   
28 -def untag_sequence_data(sequence):
29 """ 30 Given a line read in from a file containing C&C-style tagged 31 data, returns a version of the line with the tags stripped off, 32 leaving just the chord sequence. 33 """ 34 return " ".join([c.partition("|")[0] for c in sequence.split()])
35
36 -def tags_from_sequence_data(sequence):
37 return [c.rpartition("|")[2] for c in sequence.split()]
38
39 -def training_sequence_to_candc(sequence):
40 """ 41 Given a line of data in the format we use for training data, 42 converts it to a format suitable for direct input to the C&C 43 training tool. 44 45 Precisely, it removes the first X| from the start of each 46 observation, which in the data format we use is the chord itself. 47 48 """ 49 # Remove the first | and everything before it 50 return " ".join([c.partition("|")[2] for c in sequence.split()])
51
52 -def training_data_to_candc(lines):
53 """ 54 Performs the same as training_sequence_to_candc, but on a whole 55 data set (list of sequences). 56 """ 57 return [training_sequence_to_candc(seq) for seq in lines]
58 59
60 -def _sequence_to_candc_format(formatter, sequence):
61 """ 62 Produces a string representation of observations to be used as 63 training data for a C&C model from a chord sequence internal 64 model. 65 This is an inner function for the various different formats of 66 C&C data we use. 67 68 """ 69 from jazzparser.utils.base import group_pairs 70 # Produce observations from chord pairs 71 pairs_list = group_pairs( list(sequence.iterator()) + [None] ) 72 observation_list = [formatter(*chords) for chords in pairs_list] 73 return "%s\n" % " ".join(observation_list)
74
75 -def _type_format(type, mapping):
76 if mapping is None: 77 return type 78 else: 79 return mapping[type]
80
81 -def sequence_to_candc_pos(sequence, type_map=None):
82 # Generate POS training data 83 def _formatter(chord1, chord2): 84 if chord2 is None: 85 interval = "" 86 else: 87 interval = "%d" % ((chord2.root - chord1.root) % 12) 88 return "%s-%s|C" % (interval, _type_format(chord1.type, type_map))
89 return _sequence_to_candc_format(_formatter, sequence) 90
91 -def sequence_to_candc_chord_super(sequence, type_map=None):
92 # This is our own combined format that includes the observation, 93 # super-tagger training and the chord itself 94 def _formatter(chord1, chord2): 95 if chord2 is None: 96 interval = "" 97 else: 98 interval = "%d" % ((chord2.root - chord1.root) % 12) 99 return "%s|%s-%s|C|%s" % (chord1.jazz_parser_input, interval, 100 _type_format(chord1.type, type_map), chord1.category or "?")
101 return _sequence_to_candc_format(_formatter, sequence) 102
103 -def sequence_to_candc_super(sequence, type_map=None):
104 # Generate super-tagger training data 105 def _formatter(chord1, chord2): 106 if chord2 is None: 107 interval = "" 108 else: 109 interval = "%d" % ((chord2.root - chord1.root) % 12) 110 return "%s-%s|C|%s" % (interval, _type_format(chord1.type, type_map), 111 chord1.category or "?")
112 return _sequence_to_candc_format(_formatter, sequence) 113
114 -def sequence_index_to_candc_chord_super(si, *args, **kwargs):
115 """ 116 Given a SequenceIndex object containing sequence data, produces 117 C&C training data as a single string. 118 119 """ 120 return "".join([sequence_to_candc_chord_super(s, *args, **kwarg) for s in si.sequences])
121
122 -def sequence_list_to_candc_chord_super(sequences, *args, **kwarg):
123 """ 124 Given a list of sequences, produces 125 C&C training data as a single string. 126 127 """ 128 return "".join([sequence_to_candc_chord_super(s, *args, **kwarg) for s in sequences])
129
130 -def sequence_index_to_training_file(si, type_map=None):
131 """ 132 Given a SequenceIndex object, returns an open temporary file 133 containing all the data in our hybrid C&C training data format. 134 This is converted (rather trivially) by the train_model function 135 into C&C's required format. 136 137 """ 138 from tempfile import NamedTemporaryFile 139 file = NamedTemporaryFile() 140 file.write(sequence_index_to_candc_chord_super(si, type_map=type_map)) 141 file.flush() 142 return file
143
144 -def sequence_list_to_training_file(seqs, type_map=None):
145 """ 146 Given a list of sequences, returns an open temporary file 147 containing all the data in our hybrid C&C training data format. 148 This is converted (rather trivially) by the train_model function 149 into C&C's required format. 150 151 """ 152 from tempfile import NamedTemporaryFile 153 file = NamedTemporaryFile() 154 file.write(sequence_list_to_candc_chord_super(seqs, type_map=type_map)) 155 file.flush() 156 return file
157
158 -def generate_tag_list(filename, grammar=None):
159 """ 160 Generates a list of possible tags to be stored along with a C&C model. 161 It contains all tags that are in the grammar. 162 163 """ 164 from jazzparser.grammar import get_grammar 165 if grammar is None: 166 # Load the default grammar 167 grammar = get_grammar() 168 tags = grammar.families.keys() 169 data = "\n".join(tags) 170 file = open(filename, 'w') 171 file.write(data) 172 file.close()
173
174 -def read_tag_list(filename):
175 """ 176 Reads in a tag list generated by L{generate_tag_list}. 177 178 """ 179 with open(filename, 'r') as f: 180 data = f.read() 181 return data.split("\n")
182