NLI Parameters

The .py file that sits in a configuration directory specifies the parameter settings of an NLI. These are all commented in the file. For example here is the initial .py file that is created for a standalone configuration:

# *** Time and Size Limits ***

cphrase.nlu_timeout = 5  # The maximum number of seconds analysis of user utterance
cphrase.row_display_limit = None  # The maximum number of rows displayed in table result
cphrase.value_display_limit = None  # The maximum number of values displayed in list result

# *** Logging Behaviour ***

cphrase.log_to_file = False  # print log messages to .log file
cphrase.log_sql = False  # log all SQL queries/answers to/from database
cphrase.log_nlu = False  # deeply log natural language analysis
cphrase.log_sql_idm = True # Log all SQL insert,deletes and updates to special file.

# *** NLU Processing Flags ***

# word-ordering sensitivity ***

cphrase.action_can_match_any_table = True  # SELECT *, UPDATE and DELETE can match any table in utterance
cphrase.action_can_match_any_column = True # SELECT can match any column in utterance
cphrase.operator_can_match_any_table = True # NOT, EVERY, etc can match any table in utterance
cphrase.operator_can_match_any_column = True # GROUP BY, ORDER BY, etc. can match any attribute in utterance
cphrase.value_can_match_any_column = True # Values (e.g. 'red',12,etc) can match any column in utterance

# recognizers

date_recognition = True  # Attempts to convert spans like '3 days ago' into a date
integer_recognition = False # Attemps to converts spans like '12 million' to integers

# *** SQL Semantics ***

cphrase.self_joins = True  # Can the generated SQL include multiple aliased mentions of the same table.
cphrase.set_semantics = True # reduce tuple answers to sets.
cphrase.allow_dangling_variables = True # Rule out empty foreign key following
cphrase.foreign_keys_selectable = False  # Are foreign keys able to be selected
cphrase.default_column_select = True  # are columns, by default, individually SELECTable in the SQL sense.
cphrase.allow_relationship_elipsis = True # Extra relationships can be added to support all interpretations.

# *** Answer Presentation ***

cphrase.rows_displayed_per_page = 10  # The maximum number of rows displayed for each page
cphrase.update_tuple = True # Does the user get the chance to update tuples in relation selects.
cphrase.generate_fk_names = True  # Does this system attempt to reach to generate full names for foreign keys in SQL
cphrase.sql_names = False # present SQL literals for the names of relations and attributes.
cphrase.always_offer_csv_exports = True # always generate .csv files for table answers.
cphrase.render_html_links_in_answers = True # Should be turned on if answers have well-formed HTML.
cphrase.data_column_width_limit = 300 # Maximum size of string presented in tables. This needs to be high for HTML links.

# *** Miscellaneous ***

cphrase.isolation_level = cphrase.AUTOCOMMIT # Can be READ_COMMITTED, REPEATABLE_REAL or SERIALIZABLE
cphrase.default_focus_table = None  # The default relation users are asking about. For example 'items' in e-commerce.
cphrase.basket_table = None  # The relation that has an e-commerce basket.
cphrase.basket_key_column = None  # The attribute that is key for items in basket. Should be a primary key.
cphrase.generate_stars = False # Should all generated SQL over exlorer be '*' selects.
cphrase.exclude_empty_columns = False # run analysis at start-up to exclude columns from table answers.

# *** Open AI ***

cphrase.open_ai = False   # Does the user have option to ask OpenAI's NL->SQL translator?
cphrase.openai_api_key = 'Contact OpenAI for key'  # Key for access to
cphrase.open_ai_command = "SELECT" # Lead command in queries to OpenAI
cphrase.open_ai_engine = "text-davinci-002" # Engine
cphrase.open_ai_preamble = "" # Additional text prepending users question

# *** Lexicon ***

# Default column condition properties

cphrase.default_column_equalities = True
cphrase.default_column_inequalities = True
cphrase.default_column_comparisons = True
cphrase.default_column_sets = True
cphrase.default_column_superlatives = True
cphrase.default_column_text_like = True
cphrase.default_column_memo_like = True
cphrase.default_column_nulls = True
cphrase.default_column_quotes = True
cphrase.default_column_grouping = True
cphrase.default_column_aggregation = True

# Default column reference properties

cphrase.default_column_materialization = True
cphrase.default_column_completions = True
cphrase.default_text_value_stands_for_column = True
cphrase.default_numeric_value_stands_for_column = True
cphrase.default_temporal_value_stands_for_column = True
cphrase.plural_values = True  # Generate plural values as a part of lexicon
cphrase.plural_lexes = True  # Generate default plural forms for tables and columns
cphrase.columns_stand_for_tables =  True  # Can columns in a select stand independent of table mention?

# Function words

cphrase.lex = {'AFTER': ['after', 'later than'],
 'AGO': ['ago', 'earlier', 'in past', 'past'],
 'AND': ['and'],
 'ANSWER_NUMBER': ['Number:'],
 'AT_LEAST_CARD': ['at least'],
 'AT_MOST_CARD': ['at most'],
 'AVERAGE': ['average'],
 'BE': ['is', 'does', 'do'],
 'BEFORE': ['before', 'earlier than'],
 'CLAUSE_MARKERS': ['that', 'which', 'who'],
 'COUNT': ['how many', 'number', 'count', 'total'],
 'DATE': ['date', 'day'],
 'DAY': ['days', 'day'],
 'DECREASING_ORDER_BY': ['decreasing'],
 'DELETE': ['delete', 'remove', 'drop', 'forget'],
 'EQUAL_TO': ['equal to', '=', 'exactly'],
 'EVERY': ['every', 'always'],
 'EXACTLY_CARD': ['exactly'],
 'EXISTS': ['exists'],
 'FALSE': ['false'],
 'FUTURE': ['from now', 'in future', 'future'],
 'GREATER_THAN': ['more than', 'greater than', 'more', 'over', '>'],
 'GROUP_BY': ['group by', 'grouped by', 'group', 'by', 'per'],
 'HOUR': ['hour', 'hours'],
 'INCREASING_ORDER_BY': ['increasing'],
 'INSERT': ['insert', 'add'],
 'LESS_CARD': ['less than'],
 'LESS_THAN': ['less than', 'less', 'fewer than', 'under', '<'],
 'MATCHING': ['containing',
 'MAX': ['maximum', 'max'],
 'MAX_NUMERICAL': ['largest', 'greatest', 'biggest', 'highest'],
 'MAX_TIME': ['latest'],
 'MIN': ['minimum', 'min'],
 'MINUTE': ['minutes', 'min', 'minute'],
 'MIN_NUMERICAL': ['smallest', 'lowest', 'least'],
 'MIN_TIME': ['earliest', 'least recent', 'oldest'],
 'MONTH': ['month', 'months'],
 'MORE_CARD': ['more than'],
 'NOT': ['not', 'no', 'without', 'non'],
 'NOT_EQUAL_TO': ['not equal to', '<>', 'not equal', 'not exactly', 'not'],
 'NOT_EXACTLY_CARD': ['not exactly'],
 'NOT_NULL': ['not null', 'provided', 'defined'],
 'NOT_VALUE': ['not'],
 'NOW': ['now', 'just now'],
 'NO_ANSWERS': ['The database does not contain:'],
 'NULL': ['unknown', 'null', 'nothing', 'no value', 'without', 'missing', 'no'],
 'OF': ['of'],
 'OR': ['or'],
 'ORDER_BY': ['order by',
              'order on',
              'ordered by',
              'ordered on',
              'sort by',
              'sort on',
              'sorted by',
              'sorted on',
 'STOP': ['a',
 'SUM': ['total', 'sum', 'combined'],
 'TO': ['to'],
 'TODAY': ['today'],
 'TOMORROW': ['tomorrow'],
 'TOTALLING_EXACTLY': ['totalling exactly'],
 'TOTALLING_OVER': ['totalling over'],
 'TOTALLING_UNDER': ['totalling under'],
 'TRUE': ['true'],
 'UPDATE': ['set', 'update', 'change', 'modify', 'edit'],
 'WEEK': ['weeks', 'week'],
 'WH': ['what', 'which', 'give', 'tell', 'list', 'show', 'select'],
 'WITH': ['with'],
 'YEAR': ['year', 'years'],
 'YESTERDAY': ['yesterday']}