jazzparser.settings

Source Code for Module jazzparser.settings

1 """Global settings for the Jazz Parser. 2 3 This module is imported by other modules to access global settings. 4 """ 5 6 import os 7 PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) 8 # Where all the source code lives (absolute path) 9 SOURCE_DIR = os.path.join(PROJECT_ROOT, "src") 10 # Where the statistical model data lives 11 MODEL_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "data") 12 # Model data for PCFG models 13 PCFG_MODEL_DATA_DIR = os.path.join(MODEL_DATA_DIR, "pcfg") 14 # A slightly different location for backoff model data 15 BACKOFF_MODEL_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "backoff_data") 16 # Where the XML grammar definitions live 17 GRAMMAR_DATA_DIR = os.path.join(PROJECT_ROOT, "grammars") 18 # Where config files and local data are stored 19 LOCAL_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "local") 20 # Generic temporary directory for any purpose 21 TEMP_DIR = os.path.join(PROJECT_ROOT, "etc", "tmp") 22 # Where external corpora are stored within the project 23 CORPORA_DIR = os.path.join(PROJECT_ROOT, "input", "corpora") 24 # Where tonal space analysis sets live 25 ANALYSIS_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "analyses") 26 # Where data for unit tests is stored 27 TEST_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "test") 28 # Where shell states should be stored 29 SHELL_STATE_DIR = os.path.join(PROJECT_ROOT, "etc", "shell_state") 30 # Where releases are built 31 RELEASE_BUILD_DIR = os.path.join(TEMP_DIR, "release") 32 33 # The version ID of the software currently 34 from . import __version__ 35 CURRENT_VERSION = __version__ 36 37 # Sets the name of the default grammar that will be used if none other is 38 # specified on the command line. 39 DEFAULT_GRAMMAR = "jazz3.0" 40 41 # The type of supertagger that should be used by default 42 DEFAULT_SUPERTAGGER = 'full' 43 44 # The grammar infrastructure to use by default 45 # This doesn't usually make a difference, since it's specified by the grammar (in the XML) 46 # If it's omitted from the XML this will be used, and some tools will assume 47 # the default formalism if no other information is available 48 DEFAULT_FORMALISM = 'music_halfspan' 49 50 # The parser algorithm module to use by default 51 DEFAULT_PARSER = 'cky' 52 53 # Substrings that are stripped from input sequences given in plain text format 54 IGNORED_INPUT_STRINGS = [ "|", ] 55 56 # Output warnings during derivation if there are free variables in the semantics 57 WARN_ABOUT_FREE_VARS = False 58 59 # File to save the interactive shell's history to 60 SHELL_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "shell-history") 61 # File to save the input prompt's history to 62 INPUT_PROMPT_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "input-history") 63 # File to save the tagger test input loop's history to 64 TAG_PROMPT_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "tag-history") 65 # Ngram query script history 66 NGRAM_QUERY_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "ngram-history") 67

68 -class OPTIONS:

69 ## These are defaults that may be overridden by cmd line opts 70 # Global flag to decide whether to output times on all semantic objects or just TDs 71 OUTPUT_ALL_TIMES = False 72 # Global flag to toggle between Latex and text output 73 OUTPUT_LATEX = False 74 # Output options that each formalism defines, indexed by formalism name 75 OUTPUT = {}

76

77 -class CANDC:

78 BASE_PATH = os.path.join(PROJECT_ROOT, "lib", "candc") 79 MODELS_PATH = os.path.join(PROJECT_ROOT, "etc", "candc_data") 80 DEFAULT_TRAINING_PARAMS = { 81 'super-category_cutoff' : '0', 82 'super-cutoff_default' : '0', 83 'super-cutoff_words' : '0', 84 #'super-rare_cutoff' : '20', # This makes no difference 85 'super-tagdict_min' : '0', 86 'super-tagdict_ratio' : '10000', 87 'super-beam_width' : '20', 88 'super-beam_ratio' : '0.0', 89 #'super-model_sigma' : '0.9', 90 'sigma' : '0.85', 91 } 92 LOG_DIRECTORY = os.path.join(PROJECT_ROOT, "etc", "log", "candc")

93

94 -class PCFG_PARSER:

95 ### Default settings ### 96 # These can be overridden by module options 97 ######################## 98 # Module option: threshold 99 # Maximum ratio between the highest probability in a cell and the lowest 100 # before the lower end gets chucked out 101 DEFAULT_THRESHOLD = 0.01 102 # Module option: maxarc 103 # Maximum number of signs in a cell before the lower probability ones 104 # get chucked out 105 DEFAULT_MAX_ARC_SIZE = 20 106 ### Default training options ### 107 # These too can be overridden by module options 108 ######################## 109 # Module option: cat_bins 110 # Number of possible categories to reserve mass for when smoothing 111 # This number is the number of theoretically possible categories in halfspan 112 # TODO: This is an overestimate, it should be 1296 113 # TODO: Try the model with 1296 instead 114 DEFAULT_CAT_BINS = 4752

115

116 -class TEST:

117 """ 118 Settings and common constants for unit tests (see L{jptests}). 119 120 """ 121 SEQUENCE_DATA = os.path.join(PROJECT_ROOT, "input", "fullseqs")

122