1 """Misc utilities used by the chordclass tagger.
2
3 """
4 """
5 ============================== License ========================================
6 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
7
8 This file is part of The Jazz Parser.
9
10 The Jazz Parser is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 The Jazz Parser is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
22
23 ============================ End license ======================================
24
25 """
26 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
27
29 """
30 CKY chart for the mini-parsing task we need to do to prepare the categories.
31 """
42
44 return self._chart[start][end-1]
45
47 self._chart[start][end-1].append(val)
48
50
51 return str(self._chart)
52
54 """
55 Processes the output from decoding the model in the form of a set of
56 top tags for each timestep. We need to find each possible sequence of
57 identical tags and combine then into a single span so that the
58 self-transition gets interpreted as a continuation of the category.
59 We also want to keep the smaller spans, right down to the single-timestep
60 categories, so that they can be considered by the parser in combination
61 with other spans that would otherwise overlap.
62
63 We do this using a little CKY chart parser.
64
65 """
66 T = len(timesteps)
67 chart = CategoryProcessChart(T)
68
69 timesteps = [list(enumerate(timestep)) for timestep in timesteps]
70
71
72 for (time,tags) in enumerate(timesteps):
73 for tag in tags:
74 chart.add_span(time, time+1, tag)
75
76
77 for end in range(2, T+1):
78 for start in range(0, end-1):
79 for middle in range(start+1, end):
80
81 first = chart.get_cell(start, middle)
82 second = chart.get_cell(middle, end)
83 for priority1,(prob1,tag1) in first:
84 for priority2,(prob2,tag2) in second:
85 if tag1 == tag2:
86
87
88
89 chart.add_span(start, end, \
90 (max(priority1, priority2), (prob1+prob2, tag1)))
91
92
93 prioritized_spans = {}
94 for start in range(0, T):
95 for end in range(start+1, T+1):
96 for (priority,(prob,tag)) in chart.get_cell(start,end):
97 prioritized_spans.setdefault(priority, []).append((start, end, (prob,tag)))
98
99 grouped_spans = [spans for (priority,spans) in sorted(prioritized_spans.items())]
100
101 return grouped_spans
102