jazzparser.utils.strings

1 """String processing utilities. 2 3 """ 4 """ 5 ============================== License ======================================== 6 Copyright (C) 2008, 2010-12 University of Edinburgh, Mark Granroth-Wilding 7 8 This file is part of The Jazz Parser. 9 10 The Jazz Parser is free software: you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation, either version 3 of the License, or 13 (at your option) any later version. 14 15 The Jazz Parser is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with The Jazz Parser. If not, see <http://www.gnu.org/licenses/>. 22 23 ============================ End license ====================================== 24 25 """ 26 __author__ = "Mark Granroth-Wilding <mark.granroth-wilding@ed.ac.uk>" 27

28 -def strs(list, sep=", "):

29 """ 30 I'm fed up of typing this every time I want to print a list! 31 32 This is nothing more than:: 33 return sep.join(["%s" % item for item in list]) 34 35 """ 36 return sep.join(["%s" % item for item in list])

37

38 -def fmt_prob(prob, prec=4):

39 """ 40 Format a float as a string in a style suitable for displaying 41 probabilities. 42 This is not a particularly quick procedure. If you need to format 43 lots of probabilities, it's probably best to do something cruder. 44 45 """ 46 from decimal import Decimal 47 # Quantize the value to the correct precision 48 prob = Decimal(str(prob))#.as_tuple() 49 quant = Decimal((0, [1,], prob.adjusted()-prec+1)) 50 prob = prob.quantize(quant) 51 # Format it yourself, because Decimal's to_sci_string is crap 52 tup = prob.as_tuple() 53 sci_str = "%s%d.%se%d" % ("-" if prob.is_signed() else "", tup.digits[0], "".join(["%d" % dig for dig in tup.digits[1:]]), prob.adjusted()) 54 # Add more spacing for higher precisions 55 #fmt_str = " >%ds" % (prec+3) 56 return sci_str #format(sci_str, fmt_str)

57

58 -def group_numerical_suffixes(inlist, open_brace="{", close_brace="}"):

59 """ 60 Handy utility for concise readable output of a list of name that 61 includes many that differ only by a numerical suffix. 62 For example, ['model0','model1','model2'] is better represented 63 as 'model{0-2}'. 64 65 Given a list of items, return a potentially smaller list, with all 66 names differing only by a numerical suffix condensed into a single 67 item, using {} to denote the suffix and using ranges where possible, 68 otherwise comma-separated lists. 69 70 """ 71 import re 72 name_nums = {} 73 outlist = [] 74 # Look for everything that ends in pure numbers 75 num_end = re.compile('^(?P<name>.*?)(?P<number>\d+)$') 76 77 for full_name in inlist: 78 found = num_end.match(full_name) 79 if found is not None: 80 vals = found.groupdict() 81 # This name ends in a number 82 name_nums.setdefault(vals['name'], []).append(int(vals['number'])) 83 else: 84 # Can't group this in any way 85 outlist.append(full_name) 86 87 for name,nums in name_nums.items(): 88 if len(nums) == 1: 89 # Nothing to group with - don't group 90 outlist.append("%s%s" % (name, nums[0])) 91 else: 92 # Perform the grouping 93 nums.sort() 94 ranges = [] 95 range_start = range_end = nums[0] 96 97 for num in nums[1:]+[None]: 98 # This None makes the loop continue once more after the 99 # last item to add the last range 100 if num is not None and num == range_end+1: 101 # Increment in the current range 102 range_end = num 103 else: 104 # End of range 105 if range_start == range_end: 106 # Lonely number: no range 107 ranges.append("%s" % range_start) 108 else: 109 # Generate a range 110 ranges.append("%s-%s" % (range_start, range_end)) 111 range_start = range_end = num 112 # Grouped the name into ranges 113 outlist.append("%s%s%s%s" % (name, open_brace, ",".join(ranges), close_brace)) 114 return outlist

115

116 -def make_unique(strings, separator=""):

117 """ 118 Ensures that there are no duplicate strings in a list of strings. Wherever 119 a duplicate is found, it is distinguished by appending an integer. 120 121 """ 122 seen = {} 123 unique = [] 124 for string in strings: 125 if string in seen: 126 unique.append("%s%s%d" % (string, separator, seen[string])) 127 seen[string] += 1 128 else: 129 unique.append(string) 130 seen[string] = 0 131 return unique

132

133 -def strip_accents(string):

134 """ 135 Given a unicode string, which may contain accented characters, 136 returns a string with no accented characters. 137 138 """ 139 import unicodedata 140 return ''.join((c for c in unicodedata.normalize('NFD', unicode(string)) \ 141 if unicodedata.category(c) != 'Mn'))

142 143 144 TRUE_STRINGS = ["true", "t", "1", "yes", "on", "hellyeah"]

145 -def str_to_bool(string):

146 """ 147 Interprets the string as a boolean. Normal Python behaviour for converting 148 a str to a bool is to return False for the empty string and True for 149 everything else. This function interprets a load of sensible true values 150 as True and everything else as False. 151 152 Strings considered true (case insensitive): %s. 153 154 """ % ", ".join(TRUE_STRINGS) 155 return string.strip().lower() in TRUE_STRINGS

156

157 -def slugify(value):

158 """ 159 Normalizes string, converts to lowercase, removes non-alpha characters, 160 and converts spaces to hyphens. 161 162 Lifted straight from Django's slugify function. 163 164 """ 165 import re 166 value = re.sub('[^\w\s-]', '', value).strip().lower() 167 value = re.sub('[\s]+', '_', value) 168 return value

169

Source Code for Module jazzparser.utils.strings