NLI Parameters

The <configuration-name>.json file that sits in a configuration directory specifies the parameter settings of an NLI. Parameters are grouped into six types: Logging, NLU Processing, SQL, NLI, Lexicon and Open AI. Here is the initial .json file that is created for a standalone configuration:

{
  "log_to_file": true,
  "log_sql": false,
  "log_nlu": false,
  "log_sql_idm": true,
  "logo": "https://c-phrase.com/images/cp-logo.png",
  "url": "https://c-phrase.com/",
  "intro_message": "

Welcome to the NLI Portal for tt.

", "rows_displayed_per_page": 10, "always_offer_csv_exports": true, "show_sql": true, "share_link": true, "update_tuple": true, "data_column_width_limit": 200, "reshow_interval": null, "completions": true, "charts_on": true, "allow_corpus_switching": true, "table_limit": 100, "list_limit": 100, "groups_limit": 200, "distinct_on_tuple_queries": true, "bracket_tables_and_cols": false, "generate_fk_names": true, "self_joins": true, "allow_relationship_elipsis": true, "hard_foreign_keys": true, "index_everything": false, "allow_dangling_variables": true, "nlu_timeout": null, "template_fallback": false, "templates_file": "templates.templates", "mappings_limit": 200000, "command_can_bind_to_any_table": false, "command_can_bind_to_any_column": false, "operator_can_bind_to_any_table": false, "operator_can_bind_to_any_column": false, "value_can_bind_to_any_column": false, "default_column_select": true, "foreign_keys_selectable": true, "plural_columns_and_tables": false, "lex_shadows_element": false, "default_focus_table": null, "us_date_format": false, "default_column_comparisons": true, "default_column_equalities": true, "default_column_grouping": true, "default_column_inequalities": true, "default_column_text_like": true, "default_column_memo_like": true, "default_column_nulls": true, "default_column_quotes": true, "default_column_sets": true, "default_column_superlatives": true, "materialize_for_cardinalities_under": 100, "default_column_aggregation": true, "default_column_completions": true, "default_column_materialization": true, "default_plural_values": true, "default_text_value_stands_for_column": true, "default_memo_value_stands_for_column": false, "default_numeric_value_stands_for_column": true, "default_temporal_value_stands_for_column": true, "lex": { "AFTER": [ "after", "later than" ], "AGO": [ "ago", "earlier", "in past", "past", "last" ], "AND": [ "and" ], "ANSWER_NUMBER": [ "Number:" ], "AT_LEAST_CARD": [ "at least" ], "AT_MOST_CARD": [ "at most" ], "AVERAGE": [ "average" ], "BE": [ "is", "does", "do" ], "BEFORE": [ "before", "earlier than" ], "BETWEEN": [ "between", "range", "from" ], "BETWEEN_SEPERATOR": [ "and", "to", "" ], "CLAUSE_MARKERS": [ "that", "which", "who" ], "COUNT": [ "how many", "number", "count", "total" ], "DATE": [ "date", "day" ], "DAY": [ "days", "day" ], "DECREASING_ORDER_BY": [ "decreasing" ], "DELETE": [ "delete", "remove", "drop", "forget" ], "EQUAL_TO": [ "equal to", "=", "exactly" ], "EVERY": [ "every", "always" ], "EXACTLY_CARD": [ "exactly" ], "EXISTS": [ "exists" ], "FALSE": [ "false" ], "FUTURE": [ "from now", "in future", "future", "in the future", "from today" ], "GREATER_THAN": [ "more than", "greater than", "more", "over", ">" ], "GREATER_THAN_OR_EQUAL_TO": [ "greater than or equal to", ">=" ], "GROUP_BY": [ "group by", "grouped by", "group", "by", "per" ], "HOUR": [ "hour", "hours" ], "INCREASING_ORDER_BY": [ "increasing" ], "INSERT": [ "insert", "add" ], "IN_PAST": [ "since", "in past", "in last", "over past", "last", "past", "over the past", "in the last", "previous", "for last", "over last" ], "LESS_CARD": [ "less than" ], "LESS_THAN": [ "less than", "less", "fewer than", "under", "<" ], "LESS_THAN_OR_EQUAL_TO": [ "less than or equal to", "<=" ], "MATCHING": [ "containing", "like", "matching", "contains", "contain", "including", "includes" ], "MAX": [ "maximum", "max" ], "MAX_NUMERICAL": [ "largest", "greatest", "biggest", "highest" ], "MAX_TIME": [ "latest" ], "MIN": [ "minimum", "min" ], "MINUTE": [ "minute", "minutes" ], "MIN_NUMERICAL": [ "smallest", "lowest", "least" ], "MIN_TIME": [ "earliest", "least recent", "oldest" ], "MONTH": [ "month", "months" ], "MORE_CARD": [ "more than" ], "NOT": [ "not", "no", "without", "non" ], "NOT_EQUAL_TO": [ "not equal to", "<>", "not equal", "not exactly", "not" ], "NOT_EXACTLY_CARD": [ "not exactly" ], "NOT_EXISTS": [ "without", "not", "no" ], "NOT_NULL": [ "not null", "known" ], "NOT_VALUE": [ "not" ], "NOW": [ "now", "just now" ], "NO_ANSWERS": [ "The database does not contain:" ], "NULL": [ "unknown", "null" ], "OF": [ "of" ], "OR": [ "or" ], "ORDER_BY": [ "order by", "order on", "ordered by", "ordered on", "sort by", "sort on", "sorted by", "sorted on", "order", "sort" ], "STOP": [ "a", "about", "all", "an", "and", "any", "are", "be", "both", "by", "can", "could", "did", "do", "does", "done", "each", "exist", "for", "from", "has", "have", "i", "in", "into", "is", "it", "me", "me", "much", "of", "on", "or", "other", "please", "show", "tell", "to", "that", "the", "them", "there", "those", "through", "umm", "was", "were", "where", "which", "who", "whose", "with", "would", "you" ], "SUM": [ "total", "sum", "combined" ], "TO": [ "to" ], "TODAY": [ "today" ], "TOMORROW": [ "tomorrow" ], "TOTALLING_EXACTLY": [ "totalling exactly" ], "TOTALLING_OVER": [ "totalling over" ], "TOTALLING_UNDER": [ "totalling under" ], "TRUE": [ "true" ], "UPDATE": [ "set", "update", "change", "modify", "edit" ], "WEEK": [ "weeks", "week" ], "WH": [ "what", "which", "give", "tell", "list", "show", "select" ], "WITH": [ "with" ], "YEAR": [ "year", "years" ], "YESTERDAY": [ "yesterday" ] }, "batch_loader": false, "size_per_db_xml_file": 1000000, "exclude_empty_columns_at_build": false, "no_selects_via_rest": false, "date_recognition": true, "integer_recognition": true, "render_html_links_in_answers": false, "basket_table": null, "basket_key_column": null, "isolation_level": 0, "log_cdata_limit": 0, "check_number_in_generation": false, "brackets_for_target": false, "let_quotes_stand_for_tables": false, "demo_current_time": null }

Logging

These parameters control what types of logs are built as an NLI runs. These log file can be searched and processed in the edit page of the Admin Interface. The parameter log_to_file turns on logging and log_sql and log_nlu will add further details to the log. The parameter log_sql_idm logs databade modifications. When this is on, the SQL for INSERT,UPDATE and DELETE commands will be logged to a file that, if needed, can later be executed as a script. This is useful in the data quality use case of Catch-Phrase where users fix data in the database.

NLI

These parameters control aspects of the NLI. The parameter intro_message is the HTML that is displayed in the main results area when the user first enters the NLI. The parameter batch_loader determines if the user will be able to open the batch loader modal. This is useful if the user plans on pasting in material that they can then massage into insert command form to build a database (see for example this video). The parameter sql_names_in_explorer determines if just raw table names and attributes should be used in the explorer. rows_displayed_per_page is the default page size in Catch-Phrase table pagination. The parameter always_offer_csv_exports lets users always be able to download results as a CSV. The parameter show_sql determines if the user can always inspect the generated SQL. The parameter update_tuple enables, if there are write permission on a table, a button that lets users edit tuples. Finally the parameter data_column_width_limit sets the maximum visible with of text material as values in tables.

SQL

The parameters table_limit and list_limit put SQL limits on the sizes of tables and lists respectively. For larger databases it is critical that these be set. Otherwise, answers might be too large and queries will need to be canceled. The parameter groups_limit will limit the number of values on the x-axis of charts. The parameter distinct_on_tuple_queries will apply SQL distinct on table queries. The parameter bracket_tables_and_cols encloses table and column names in the SQL with the engine specific strategy to allow for tables and columns with spaces or being SQL keywords. Although it slightly clutters the generated SQL, we highly recommend that you keep this parameter set to true. The parameter generate_fk_names will instruct Catch-Phrase to expand selects of foreign keys on tables to join through and display primary values if they are declared. This gives users a much more readable experience. The parameter hard_foreign_keys determines that on database rebuilds, foreign keys will be defined. The parameter self_joins lets Catch-Phrase search for interpretations that include more than one mention of a given table in the resulting SQL. If self-joining is false, then Catch-Phrase will not search for these more complex interpretations. The advantage of that is less likelihood of spurious ambiguity. This is sufficient for many simple applications. Still for many others there is a need for SQL to be self-joining. Finally, the parameter allow_dangling_variables will allow SQL to include dangling references to tuples that strictly speaking are always true based on foreign keys.

NLU Processing

The parameters of this type control the natural language understanding process. The parameter nlu_timeout sets a total amount of time Catch-Phrase has to analyse and run a query.

You may specify the rigidity of the word ordering in the question or command. The more rigid, the less likelihood of spurious ambiguity. However, the more rigid the higher likelihood that no match is found for the user's question or command. The defaults setting is to go for rigidity. You can experiment with alternative settings, but it is likely that you will not need to change these parameters.

Lexicon

These parameters determine how Catch-Phrase builds its internal lexicon. The parameter default_column_select determines if columns are selectable by default. The parameter foreign_keys_selectable will determine if foreign keys are selectable. The parameter plural_columns_and_tables will attempt to default English plural endings on automatically generated names from tables and columns. The parameter lex_shadows_element covers a special case when you do not want the underlying table or column name to be recognized once you specify lexical material. This only rarely is required. The parameter default_focus_table lets you specify that, by default questions or commands are over a given table. This is useful in one-table domains and lets the user skip always having to mention the table name. Finally, The parameter us_date_format will use the US dates rather than the default ISO Date format.

The default condition and reference properties specify the default setting of properties on columns. These are overriden on a column by column basis in the actual column elements (see). Finally the lex property is the dictionary of function words in the underlying language. You can restrict or extend the language by editing this dictionary.