1 """String processing utilities.
2
3 """
4 """
5 ============================== License ========================================
6 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding
7
8 This file is part of The Jazz Parser.
9
10 The Jazz Parser is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 The Jazz Parser is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>.
22
23 ============================ End license ======================================
24
25 """
26 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>"
27
28 -def strs(list, sep=", "):
29 """
30 I'm fed up of typing this every time I want to print a list!
31
32 This is nothing more than::
33 return sep.join(["%s" % item for item in list])
34
35 """
36 return sep.join(["%s" % item for item in list])
37
39 """
40 Format a float as a string in a style suitable for displaying
41 probabilities.
42 This is not a particularly quick procedure. If you need to format
43 lots of probabilities, it's probably best to do something cruder.
44
45 """
46 from decimal import Decimal
47
48 prob = Decimal(str(prob))
49 quant = Decimal((0, [1,], prob.adjusted()-prec+1))
50 prob = prob.quantize(quant)
51
52 tup = prob.as_tuple()
53 sci_str = "%s%d.%se%d" % ("-" if prob.is_signed() else "", tup.digits[0], "".join(["%d" % dig for dig in tup.digits[1:]]), prob.adjusted())
54
55
56 return sci_str
57
59 """
60 Handy utility for concise readable output of a list of name that
61 includes many that differ only by a numerical suffix.
62 For example, ['model0','model1','model2'] is better represented
63 as 'model{0-2}'.
64
65 Given a list of items, return a potentially smaller list, with all
66 names differing only by a numerical suffix condensed into a single
67 item, using {} to denote the suffix and using ranges where possible,
68 otherwise comma-separated lists.
69
70 """
71 import re
72 name_nums = {}
73 outlist = []
74
75 num_end = re.compile('^(?P<name>.*?)(?P<number>\d+)$')
76
77 for full_name in inlist:
78 found = num_end.match(full_name)
79 if found is not None:
80 vals = found.groupdict()
81
82 name_nums.setdefault(vals['name'], []).append(int(vals['number']))
83 else:
84
85 outlist.append(full_name)
86
87 for name,nums in name_nums.items():
88 if len(nums) == 1:
89
90 outlist.append("%s%s" % (name, nums[0]))
91 else:
92
93 nums.sort()
94 ranges = []
95 range_start = range_end = nums[0]
96
97 for num in nums[1:]+[None]:
98
99
100 if num is not None and num == range_end+1:
101
102 range_end = num
103 else:
104
105 if range_start == range_end:
106
107 ranges.append("%s" % range_start)
108 else:
109
110 ranges.append("%s-%s" % (range_start, range_end))
111 range_start = range_end = num
112
113 outlist.append("%s%s%s%s" % (name, open_brace, ",".join(ranges), close_brace))
114 return outlist
115
117 """
118 Ensures that there are no duplicate strings in a list of strings. Wherever
119 a duplicate is found, it is distinguished by appending an integer.
120
121 """
122 seen = {}
123 unique = []
124 for string in strings:
125 if string in seen:
126 unique.append("%s%s%d" % (string, separator, seen[string]))
127 seen[string] += 1
128 else:
129 unique.append(string)
130 seen[string] = 0
131 return unique
132
134 """
135 Given a unicode string, which may contain accented characters,
136 returns a string with no accented characters.
137
138 """
139 import unicodedata
140 return ''.join((c for c in unicodedata.normalize('NFD', unicode(string)) \
141 if unicodedata.category(c) != 'Mn'))
142
143
144 TRUE_STRINGS = ["true", "t", "1", "yes", "on", "hellyeah"]
146 """
147 Interprets the string as a boolean. Normal Python behaviour for converting
148 a str to a bool is to return False for the empty string and True for
149 everything else. This function interprets a load of sensible true values
150 as True and everything else as False.
151
152 Strings considered true (case insensitive): %s.
153
154 """ % ", ".join(TRUE_STRINGS)
155 return string.strip().lower() in TRUE_STRINGS
156
158 """
159 Normalizes string, converts to lowercase, removes non-alpha characters,
160 and converts spaces to hyphens.
161
162 Lifted straight from Django's slugify function.
163
164 """
165 import re
166 value = re.sub('[^\w\s-]', '', value).strip().lower()
167 value = re.sub('[\s]+', '_', value)
168 return value
169