Package jazzparser :: Module settings
[hide private]
[frames] | no frames]

Source Code for Module jazzparser.settings

  1  """Global settings for the Jazz Parser. 
  2   
  3  This module is imported by other modules to access global settings. 
  4  """ 
  5   
  6  import os 
  7  PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) 
  8  # Where all the source code lives (absolute path) 
  9  SOURCE_DIR = os.path.join(PROJECT_ROOT, "src") 
 10  # Where the statistical model data lives 
 11  MODEL_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "data") 
 12  # Model data for PCFG models 
 13  PCFG_MODEL_DATA_DIR = os.path.join(MODEL_DATA_DIR, "pcfg") 
 14  # A slightly different location for backoff model data 
 15  BACKOFF_MODEL_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "backoff_data") 
 16  # Where the XML grammar definitions live 
 17  GRAMMAR_DATA_DIR = os.path.join(PROJECT_ROOT, "grammars") 
 18  # Where config files and local data are stored 
 19  LOCAL_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "local") 
 20  # Generic temporary directory for any purpose 
 21  TEMP_DIR = os.path.join(PROJECT_ROOT, "etc", "tmp") 
 22  # Where external corpora are stored within the project 
 23  CORPORA_DIR = os.path.join(PROJECT_ROOT, "input", "corpora") 
 24  # Where tonal space analysis sets live 
 25  ANALYSIS_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "analyses") 
 26  # Where data for unit tests is stored 
 27  TEST_DATA_DIR = os.path.join(PROJECT_ROOT, "etc", "test") 
 28  # Where shell states should be stored 
 29  SHELL_STATE_DIR = os.path.join(PROJECT_ROOT, "etc", "shell_state") 
 30  # Where releases are built 
 31  RELEASE_BUILD_DIR = os.path.join(TEMP_DIR, "release") 
 32   
 33  # The version ID of the software currently 
 34  from . import __version__ 
 35  CURRENT_VERSION = __version__ 
 36   
 37  # Sets the name of the default grammar that will be used if none other is 
 38  #  specified on the command line. 
 39  DEFAULT_GRAMMAR = "jazz3.0" 
 40   
 41  # The type of supertagger that should be used by default 
 42  DEFAULT_SUPERTAGGER = 'full' 
 43   
 44  # The grammar infrastructure to use by default 
 45  # This doesn't usually make a difference, since it's specified by the grammar (in the XML) 
 46  # If it's omitted from the XML this will be used, and some tools will assume  
 47  #  the default formalism if no other information is available 
 48  DEFAULT_FORMALISM = 'music_halfspan' 
 49   
 50  # The parser algorithm module to use by default 
 51  DEFAULT_PARSER = 'cky' 
 52   
 53  # Substrings that are stripped from input sequences given in plain text format 
 54  IGNORED_INPUT_STRINGS = [ "|", ] 
 55   
 56  # Output warnings during derivation if there are free variables in the semantics 
 57  WARN_ABOUT_FREE_VARS = False 
 58   
 59  # File to save the interactive shell's history to 
 60  SHELL_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "shell-history") 
 61  # File to save the input prompt's history to 
 62  INPUT_PROMPT_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "input-history") 
 63  # File to save the tagger test input loop's history to 
 64  TAG_PROMPT_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "tag-history") 
 65  # Ngram query script history 
 66  NGRAM_QUERY_HISTORY_FILE = os.path.join(LOCAL_DATA_DIR, "ngram-history") 
 67   
68 -class OPTIONS:
69 ## These are defaults that may be overridden by cmd line opts 70 # Global flag to decide whether to output times on all semantic objects or just TDs 71 OUTPUT_ALL_TIMES = False 72 # Global flag to toggle between Latex and text output 73 OUTPUT_LATEX = False 74 # Output options that each formalism defines, indexed by formalism name 75 OUTPUT = {}
76
77 -class CANDC:
78 BASE_PATH = os.path.join(PROJECT_ROOT, "lib", "candc") 79 MODELS_PATH = os.path.join(PROJECT_ROOT, "etc", "candc_data") 80 DEFAULT_TRAINING_PARAMS = { 81 'super-category_cutoff' : '0', 82 'super-cutoff_default' : '0', 83 'super-cutoff_words' : '0', 84 #'super-rare_cutoff' : '20', # This makes no difference 85 'super-tagdict_min' : '0', 86 'super-tagdict_ratio' : '10000', 87 'super-beam_width' : '20', 88 'super-beam_ratio' : '0.0', 89 #'super-model_sigma' : '0.9', 90 'sigma' : '0.85', 91 } 92 LOG_DIRECTORY = os.path.join(PROJECT_ROOT, "etc", "log", "candc")
93
94 -class PCFG_PARSER:
95 ### Default settings ### 96 # These can be overridden by module options 97 ######################## 98 # Module option: threshold 99 # Maximum ratio between the highest probability in a cell and the lowest 100 # before the lower end gets chucked out 101 DEFAULT_THRESHOLD = 0.01 102 # Module option: maxarc 103 # Maximum number of signs in a cell before the lower probability ones 104 # get chucked out 105 DEFAULT_MAX_ARC_SIZE = 20 106 ### Default training options ### 107 # These too can be overridden by module options 108 ######################## 109 # Module option: cat_bins 110 # Number of possible categories to reserve mass for when smoothing 111 # This number is the number of theoretically possible categories in halfspan 112 # TODO: This is an overestimate, it should be 1296 113 # TODO: Try the model with 1296 instead 114 DEFAULT_CAT_BINS = 4752
115
116 -class TEST:
117 """ 118 Settings and common constants for unit tests (see L{jptests}). 119 120 """ 121 SEQUENCE_DATA = os.path.join(PROJECT_ROOT, "input", "fullseqs")
122