NLI Parameters
The .py file that sits in a configuration directory specifies the parameter settings of an NLI. These are all commented in the file. For example here is the initial .py file that is created for a standalone configuration:
# *** Time and Size Limits *** cphrase.nlu_timeout = 5 # The maximum number of seconds analysis of user utterance cphrase.row_display_limit = None # The maximum number of rows displayed in table result cphrase.value_display_limit = None # The maximum number of values displayed in list result # *** Logging Behaviour *** cphrase.log_to_file = False # print log messages to .log file cphrase.log_sql = False # log all SQL queries/answers to/from database cphrase.log_nlu = False # deeply log natural language analysis cphrase.log_sql_idm = True # Log all SQL insert,deletes and updates to special file. # *** NLU Processing Flags *** # word-ordering sensitivity *** cphrase.action_can_match_any_table = True # SELECT *, UPDATE and DELETE can match any table in utterance cphrase.action_can_match_any_column = True # SELECT can match any column in utterance cphrase.operator_can_match_any_table = True # NOT, EVERY, etc can match any table in utterance cphrase.operator_can_match_any_column = True # GROUP BY, ORDER BY, etc. can match any attribute in utterance cphrase.value_can_match_any_column = True # Values (e.g. 'red',12,etc) can match any column in utterance # recognizers date_recognition = True # Attempts to convert spans like '3 days ago' into a date integer_recognition = False # Attemps to converts spans like '12 million' to integers # *** SQL Semantics *** cphrase.self_joins = True # Can the generated SQL include multiple aliased mentions of the same table. cphrase.set_semantics = True # reduce tuple answers to sets. cphrase.allow_dangling_variables = True # Rule out empty foreign key following cphrase.foreign_keys_selectable = False # Are foreign keys able to be selected cphrase.default_column_select = True # are columns, by default, individually SELECTable in the SQL sense. cphrase.allow_relationship_elipsis = True # Extra relationships can be added to support all interpretations. # *** Answer Presentation *** cphrase.rows_displayed_per_page = 10 # The maximum number of rows displayed for each page cphrase.update_tuple = True # Does the user get the chance to update tuples in relation selects. cphrase.generate_fk_names = True # Does this system attempt to reach to generate full names for foreign keys in SQL cphrase.sql_names = False # present SQL literals for the names of relations and attributes. cphrase.always_offer_csv_exports = True # always generate .csv files for table answers. cphrase.render_html_links_in_answers = True # Should be turned on if answers have well-formed HTML. cphrase.data_column_width_limit = 300 # Maximum size of string presented in tables. This needs to be high for HTML links. # *** Miscellaneous *** cphrase.isolation_level = cphrase.AUTOCOMMIT # Can be READ_COMMITTED, REPEATABLE_REAL or SERIALIZABLE cphrase.default_focus_table = None # The default relation users are asking about. For example 'items' in e-commerce. cphrase.basket_table = None # The relation that has an e-commerce basket. cphrase.basket_key_column = None # The attribute that is key for items in basket. Should be a primary key. cphrase.generate_stars = False # Should all generated SQL over exlorer be '*' selects. cphrase.exclude_empty_columns = False # run analysis at start-up to exclude columns from table answers. # *** Open AI *** cphrase.open_ai = False # Does the user have option to ask OpenAI's NL->SQL translator? cphrase.openai_api_key = 'Contact OpenAI for key' # Key for access to cphrase.open_ai_command = "SELECT" # Lead command in queries to OpenAI cphrase.open_ai_engine = "text-davinci-002" # Engine cphrase.open_ai_preamble = "" # Additional text prepending users question # *** Lexicon *** # Default column condition properties cphrase.default_column_equalities = True cphrase.default_column_inequalities = True cphrase.default_column_comparisons = True cphrase.default_column_sets = True cphrase.default_column_superlatives = True cphrase.default_column_text_like = True cphrase.default_column_memo_like = True cphrase.default_column_nulls = True cphrase.default_column_quotes = True cphrase.default_column_grouping = True cphrase.default_column_aggregation = True # Default column reference properties cphrase.default_column_materialization = True cphrase.default_column_completions = True cphrase.default_text_value_stands_for_column = True cphrase.default_numeric_value_stands_for_column = True cphrase.default_temporal_value_stands_for_column = True cphrase.plural_values = True # Generate plural values as a part of lexicon cphrase.plural_lexes = True # Generate default plural forms for tables and columns cphrase.columns_stand_for_tables = True # Can columns in a select stand independent of table mention? # Function words cphrase.lex = {'AFTER': ['after', 'later than'], 'AGO': ['ago', 'earlier', 'in past', 'past'], 'AND': ['and'], 'ANSWER_NUMBER': ['Number:'], 'AT_LEAST_CARD': ['at least'], 'AT_MOST_CARD': ['at most'], 'AVERAGE': ['average'], 'BE': ['is', 'does', 'do'], 'BEFORE': ['before', 'earlier than'], 'CLAUSE_MARKERS': ['that', 'which', 'who'], 'COUNT': ['how many', 'number', 'count', 'total'], 'DATE': ['date', 'day'], 'DAY': ['days', 'day'], 'DECREASING_ORDER_BY': ['decreasing'], 'DELETE': ['delete', 'remove', 'drop', 'forget'], 'EQUAL_TO': ['equal to', '=', 'exactly'], 'EVERY': ['every', 'always'], 'EXACTLY_CARD': ['exactly'], 'EXISTS': ['exists'], 'FALSE': ['false'], 'FUTURE': ['from now', 'in future', 'future'], 'GREATER_THAN': ['more than', 'greater than', 'more', 'over', '>'], 'GROUP_BY': ['group by', 'grouped by', 'group', 'by', 'per'], 'HOUR': ['hour', 'hours'], 'INCREASING_ORDER_BY': ['increasing'], 'INSERT': ['insert', 'add'], 'LESS_CARD': ['less than'], 'LESS_THAN': ['less than', 'less', 'fewer than', 'under', '<'], 'MATCHING': ['containing', 'like', 'matching', 'contains', 'contain', 'including', 'includes'], 'MAX': ['maximum', 'max'], 'MAX_NUMERICAL': ['largest', 'greatest', 'biggest', 'highest'], 'MAX_TIME': ['latest'], 'MIN': ['minimum', 'min'], 'MINUTE': ['minutes', 'min', 'minute'], 'MIN_NUMERICAL': ['smallest', 'lowest', 'least'], 'MIN_TIME': ['earliest', 'least recent', 'oldest'], 'MONTH': ['month', 'months'], 'MORE_CARD': ['more than'], 'NOT': ['not', 'no', 'without', 'non'], 'NOT_EQUAL_TO': ['not equal to', '<>', 'not equal', 'not exactly', 'not'], 'NOT_EXACTLY_CARD': ['not exactly'], 'NOT_NULL': ['not null', 'provided', 'defined'], 'NOT_VALUE': ['not'], 'NOW': ['now', 'just now'], 'NO_ANSWERS': ['The database does not contain:'], 'NULL': ['unknown', 'null', 'nothing', 'no value', 'without', 'missing', 'no'], 'OF': ['of'], 'OR': ['or'], 'ORDER_BY': ['order by', 'order on', 'ordered by', 'ordered on', 'sort by', 'sort on', 'sorted by', 'sorted on', 'order'], 'STOP': ['a', 'about', 'all', 'an', 'and', 'any', 'are', 'be', 'by', 'can', 'could', 'do', 'does', 'did', 'each', 'exist', 'for', 'from', 'has', 'have', 'i', 'in', 'into', 'is', 'it', 'me', 'me', 'much', 'of', 'on', 'or', 'other', 'please', 'show', 'tell', 'to', 'that', 'the', 'them', 'there', 'those', 'through', 'umm', 'was', 'where', 'which', 'whose', 'with', 'would', 'you'], 'SUM': ['total', 'sum', 'combined'], 'TO': ['to'], 'TODAY': ['today'], 'TOMORROW': ['tomorrow'], 'TOTALLING_EXACTLY': ['totalling exactly'], 'TOTALLING_OVER': ['totalling over'], 'TOTALLING_UNDER': ['totalling under'], 'TRUE': ['true'], 'UPDATE': ['set', 'update', 'change', 'modify', 'edit'], 'WEEK': ['weeks', 'week'], 'WH': ['what', 'which', 'give', 'tell', 'list', 'show', 'select'], 'WITH': ['with'], 'YEAR': ['year', 'years'], 'YESTERDAY': ['yesterday']}