diff --git a/.github/configs/tf-cuda.json b/.github/configs/tf-cuda.json index bc5ddc487b81..4fdc2167ee19 100644 --- a/.github/configs/tf-cuda.json +++ b/.github/configs/tf-cuda.json @@ -12,6 +12,10 @@ { "TF": "2.6", "IMAGE_TAG": "cuda-11.2.0-cudnn8" + }, + { + "TF": "2.7", + "IMAGE_TAG": "cuda-11.2.0-cudnn8" } ] } diff --git a/.github/scripts/mr_publish_results.py b/.github/scripts/mr_publish_results.py index 580aad24f104..a71d74300fa6 100644 --- a/.github/scripts/mr_publish_results.py +++ b/.github/scripts/mr_publish_results.py @@ -1,4 +1,4 @@ -# Send model regression test results to Segment and Datadog +# Send model regression test results to Datadog # with a summary of all test results. # Also write them into a report file. import copy @@ -7,7 +7,6 @@ import os from typing import Any, Dict, List, Text, Tuple -import analytics from datadog_api_client.v1 import ApiClient, Configuration from datadog_api_client.v1.api.metrics_api import MetricsApi from datadog_api_client.v1.model.metrics_payload import MetricsPayload @@ -28,14 +27,6 @@ "story_report.json": "story_prediction", } -TASK_MAPPING_SEGMENT = { - "intent_report.json": "Intent Classification", - "CRFEntityExtractor_report.json": "Entity Prediction", - "DIETClassifier_report.json": "Entity Prediction", - "response_selection_report.json": "Response Selection", - "story_report.json": "Story Prediction", -} - METRICS = { "test_run_time": "TEST_RUN_TIME", "train_run_time": "TRAIN_RUN_TIME", @@ -44,7 +35,7 @@ MAIN_TAGS = { "config": "CONFIG", - "dataset": "DATASET", + "dataset": "DATASET_NAME", } OTHER_TAGS = { @@ -53,6 +44,7 @@ "accelerator_type": "ACCELERATOR_TYPE", "type": "TYPE", "index_repetition": "INDEX_REPETITION", + "host_name": "HOST_NAME", } GIT_RELATED_TAGS = { @@ -226,27 +218,6 @@ def send_to_datadog(results: List[Dict[Text, Any]]) -> None: print(response) -def _send_to_segment(context: Dict[Text, Any]) -> None: - jobID = os.environ["GITHUB_RUN_ID"] - analytics.identify( - jobID, {"name": "model-regression-tests", "created_at": datetime.datetime.now()} - ) - - analytics.track( - jobID, - "results", - { - "config_repository": CONFIG_REPOSITORY, - **prepare_datasetrepo_and_external_tags(), - **create_dict_of_env(METRICS), - **create_dict_of_env(MAIN_TAGS), - **create_dict_of_env(OTHER_TAGS), - **create_dict_of_env(GIT_RELATED_TAGS), - **context, - }, - ) - - def read_results(file: Text) -> Dict[Text, Any]: with open(file) as json_file: data = json.load(json_file) @@ -270,12 +241,6 @@ def get_result(file_name: Text, file: Text) -> Dict[Text, Any]: return result -def _push_results_to_segment(file_name: Text, file: Text) -> None: - result = get_result(file_name, file) - result["task"] = TASK_MAPPING_SEGMENT[file_name] - _send_to_segment(result) - - def send_all_to_datadog() -> None: results = [] for dirpath, dirnames, files in os.walk(os.environ["RESULT_DIR"]): @@ -286,20 +251,9 @@ def send_all_to_datadog() -> None: send_to_datadog(results) -def send_all_results_to_segment() -> None: - analytics.write_key = os.environ["SEGMENT_TOKEN"] - for dirpath, dirnames, files in os.walk(os.environ["RESULT_DIR"]): - for f in files: - if any( - f.endswith(valid_name) for valid_name in TASK_MAPPING_SEGMENT.keys() - ): - _push_results_to_segment(f, os.path.join(dirpath, f)) - analytics.flush() - - def generate_json(file: Text, task: Text, data: dict) -> dict: config = os.environ["CONFIG"] - dataset = os.environ["DATASET"] + dataset = os.environ["DATASET_NAME"] if dataset not in data: data = {dataset: {config: []}, **data} @@ -336,5 +290,4 @@ def create_report_file() -> None: if __name__ == "__main__": send_all_to_datadog() - send_all_results_to_segment() create_report_file() diff --git a/.github/scripts/start_dd_agent.sh b/.github/scripts/start_dd_agent.sh index 8f572b9071e7..8d2442cd0d92 100755 --- a/.github/scripts/start_dd_agent.sh +++ b/.github/scripts/start_dd_agent.sh @@ -2,6 +2,7 @@ DD_API_KEY=$1 ACCELERATOR_TYPE=$2 +NVML_INTERVAL_IN_SEC=${3:-15} # 15 seconds are the default interval # Install Datadog system agent DD_AGENT_MAJOR_VERSION=7 DD_API_KEY=$DD_API_KEY DD_SITE="datadoghq.eu" bash -c "$(curl -L https://s3.amazonaws.com/dd-agent/scripts/install_script.sh)" @@ -14,7 +15,7 @@ sudo chmod 666 $DATADOG_YAML_PATH echo "tags:" echo "- service:rasa" echo "- accelerator_type:${ACCELERATOR_TYPE}" - echo "- dataset:${DATASET}" + echo "- dataset:${DATASET_NAME}" echo "- config:${CONFIG}" echo "- dataset_commit:${DATASET_COMMIT}" echo "- branch:${BRANCH}" @@ -29,6 +30,8 @@ sudo chmod 666 $DATADOG_YAML_PATH echo "- workflow:${GITHUB_WORKFLOW:-none}" echo "- github_run_id:${GITHUB_RUN_ID:-none}" echo "- github_event:${GITHUB_EVENT_NAME:-none}" + echo "- index_repetition:${INDEX_REPETITION}" + echo "- host_name:${HOST_NAME}" echo "" echo "apm_config:" echo " enabled: true" @@ -41,10 +44,15 @@ sudo chmod 666 $DATADOG_YAML_PATH sudo mv /etc/datadog-agent/conf.d/system_core.d/conf.yaml.example /etc/datadog-agent/conf.d/system_core.d/conf.yaml if [[ "${ACCELERATOR_TYPE}" == "GPU" ]]; then -# Install and enable NVML integration -sudo datadog-agent integration --allow-root install -t datadog-nvml==1.0.1 -sudo -u dd-agent -H /opt/datadog-agent/embedded/bin/pip3 install grpcio pynvml -sudo mv /etc/datadog-agent/conf.d/nvml.d/conf.yaml.example /etc/datadog-agent/conf.d/nvml.d/conf.yaml + # Install and enable NVML integration + sudo datadog-agent integration --allow-root install -t datadog-nvml==1.0.1 + sudo -u dd-agent -H /opt/datadog-agent/embedded/bin/pip3 install grpcio pynvml + NVML_CONF_FPATH="/etc/datadog-agent/conf.d/nvml.d/conf.yaml" + sudo mv "${NVML_CONF_FPATH}.example" ${NVML_CONF_FPATH} + if [[ "${NVML_INTERVAL_IN_SEC}" != 15 ]]; then + # Append a line to the NVML config file + sudo echo " min_collection_interval: ${NVML_INTERVAL_IN_SEC}" | sudo tee -a ${NVML_CONF_FPATH} > /dev/null + fi fi # Apply changes diff --git a/.github/templates/model_regression_test_results_legacy.tmpl b/.github/templates/model_regression_test_results_legacy.tmpl deleted file mode 100644 index 9d0056c7d0e4..000000000000 --- a/.github/templates/model_regression_test_results_legacy.tmpl +++ /dev/null @@ -1,159 +0,0 @@ -{{- /* - -The template reads a file with a report (the report file is available -as an artifact in the model regression tests workflow) and returns -a markdown table with a summary of the tests. - -*/ -}} -{{- /* - -The print_result_nlu template returns data depends on available fields. - -*/ -}} -{{ define "print_result_nlu" -}} -{{- if and (has (index .branch "micro avg") "f1-score") (has (index .main "micro avg") "f1-score") -}} -{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub (index (index .branch "micro avg") "f1-score")) }}) -{{- else if and (has .branch "accuracy") (has .main "accuracy") -}} -{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" (.main.accuracy | math.Sub .branch.accuracy) }}) -{{- else if and (has .branch "accuracy") (has (index .main "micro avg") "f1-score") -}} -{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub .branch.accuracy) }}) -{{- else if and (has (index .branch "micro avg") "f1-score") (has .main "accuracy") -}} -{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" (.main.accuracy | math.Sub (index (index .branch "micro avg") "f1-score")) }}) -{{- else if (has .branch "accuracy") -}} -{{ printf "%.4f" .branch.accuracy }} (`no data`) -{{- else if has (index .branch "micro avg") "f1-score" -}} -{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} (`no data`) -{{- else -}} -`no data` -{{- end -}} -{{- end -}} -{{- /* - -The print_result_core template returns data depends on available fields. - -*/ -}} -{{ define "print_result_core_micro_avg" -}} -{{- if and (has (index .branch "micro avg") "f1-score") (has (index .main "micro avg") "f1-score") -}} -{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub (index (index .branch "micro avg") "f1-score")) }}) -{{- else if and (has .branch "accuracy") (has .main "accuracy") -}} -{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" (.main.accuracy | math.Sub .branch.accuracy) }}) -{{- else if and (has .branch "accuracy") (has (index .main "micro avg") "f1-score") -}} -{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub .branch.accuracy) }}) -{{- else if and (has (index .branch "micro avg") "f1-score") (has .main "accuracy") -}} -{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" (.main.accuracy | math.Sub (index (index .branch "micro avg") "f1-score")) }}) -{{- else if (has .branch "accuracy") -}} -{{ printf "%.4f" .branch.accuracy }} (`no data`) -{{- else if has (index .branch "micro avg") "f1-score" -}} -{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} (`no data`) -{{- else -}} -`no data` -{{- end -}} -{{- end -}} - -{{ define "print_result_core_conversation_accuracy" -}} -{{- if and (has (index .branch "conversation_accuracy") "accuracy") (has (index .main "conversation_accuracy") "accuracy") -}} -{{ printf "%.4f" (index (index .branch "conversation_accuracy") "accuracy") }} ({{ printf "%.2f" ((index (index .main "conversation_accuracy") "accuracy") | math.Sub (index (index .branch "conversation_accuracy") "accuracy")) }}) -{{- else if has (index .branch "conversation_accuracy") "accuracy" -}} -{{ printf "%.4f" (index (index .branch "conversation_accuracy") "accuracy") }} (`no data`) -{{- else -}} -`no data` -{{- end -}} -{{- end -}} - -{{ define "print_table_nlu" }} -{{- $available_types := (index .results_for_dataset | jsonpath `@..type`) -}} -{{- if isKind "string" $available_types }}{{- $available_types = (index .results_for_dataset | jsonpath `@..type` | slice) -}}{{- end -}} -{{- if has $available_types "nlu" -}} -| Configuration | Intent Classification Micro F1 | Entity Recognition Micro F1 | Response Selection Micro F1 | -|---------------|-----------------|-----------------|-------------------| -{{ range $config_name, $config_data_array := .results_for_dataset -}} -{{ range $config_data := $config_data_array }} -{{- if eq $config_data.type "nlu" -}} -| `{{ $config_name }}`
test: `{{ $config_data.test_run_time }}`, train: `{{ $config_data.train_run_time }}`, total: `{{ $config_data.total_run_time }}`| -{{- if has $config_data "intent_classification" -}} -{{- $intent_class_main := dict -}} -{{- if has $.results_for_dataset_main $config_name -}} -{{- $intent_class_main = (index $.results_for_dataset_main $config_name).intent_classification -}} -{{- end -}} -{{- $intent_class := $config_data.intent_classification -}} -{{ template "print_result_nlu" (dict "branch" $intent_class "main" $intent_class_main) }}| -{{- else -}} -`no data`| -{{- end -}} -{{- if has $config_data "entity_prediction" -}} -{{- $entity_class_main := dict -}} -{{- if has $.results_for_dataset_main $config_name -}} -{{- $entity_class_main = (index $.results_for_dataset_main $config_name).entity_prediction -}} -{{- end -}} -{{- $entity_class := $config_data.entity_prediction -}} -{{ template "print_result_nlu" (dict "branch" $entity_class "main" $entity_class_main) }}| -{{- else -}} -`no data`| -{{- end -}} -{{- if has $config_data "response_selection" -}} -{{- $response_class_main := dict -}} -{{- if has $.results_for_dataset_main $config_name -}} -{{- $response_class_main = (index $.results_for_dataset_main $config_name).response_selection -}} -{{- end -}} -{{- $response_class := $config_data.response_selection -}} -{{ template "print_result_nlu" (dict "branch" $response_class "main" $response_class_main) }}| -{{- else -}} -`no data`| -{{- end }} -{{end}} -{{- end}} -{{- end}} -{{- end -}} -{{- end -}} - -{{- define "print_table_core" -}} -{{- $available_types := (index .results_for_dataset | jsonpath `@..type`) -}} -{{- if isKind "string" $available_types }}{{- $available_types = (index .results_for_dataset | jsonpath `@..type` | slice) -}}{{- end -}} -{{- if has $available_types "core" -}} -| Dialog Policy Configuration | Action Level Micro Avg. F1 | Conversation Level Accuracy | Run Time Train | Run Time Test | -|---------------|-----------------|-----------------|-------------------|-------------------| -{{ range $config_name, $config_data_array := .results_for_dataset -}} -{{ range $config_data := $config_data_array }} -{{- if eq $config_data.type "core" -}} -| `{{ $config_name }}` | -{{- if has $config_data "story_prediction" -}} -{{- $story_prediction_main := dict -}} -{{- if has $.results_for_dataset_main $config_name -}} -{{- $story_prediction_main = (index $.results_for_dataset_main $config_name).story_prediction -}} -{{- end -}} -{{- $story_prediction := $config_data.story_prediction -}} -{{ template "print_result_core_micro_avg" (dict "branch" $story_prediction "main" $story_prediction_main) }}| -{{- else -}} -`no data`| -{{- end -}} -{{- if has $config_data "story_prediction" -}} -{{- $story_prediction_main := dict -}} -{{- if has $.results_for_dataset_main $config_name -}} -{{- $story_prediction_main = (index $.results_for_dataset_main $config_name).story_prediction -}} -{{- end -}} -{{- $story_prediction := index $config_data.story_prediction -}} -{{ template "print_result_core_conversation_accuracy" (dict "branch" $story_prediction "main" $story_prediction_main) }}| -{{- else -}} -`no data`| -{{- end -}} -`{{ $config_data.train_run_time }}`| `{{ $config_data.test_run_time }}`| -{{ end }} -{{- end}} -{{- end}} -{{- end -}} -{{- end -}} - -{{- $results_main := (datasource "results_main") -}} -{{ range $dataset, $results_for_dataset := (datasource "data")}} -{{ $results_for_dataset_main := (index $results_main $dataset) -}} -{{ $content_dicts := index $results_for_dataset (index (keys $results_for_dataset) 0) -}} -{{ $one_content_dict := index $content_dicts 0 -}} -{{- if ($one_content_dict).external_dataset_repository -}} -Dataset: `{{$dataset}}`, Dataset repository branch: `{{ ($one_content_dict).dataset_repository_branch }}` (external repository), commit: `{{ ($one_content_dict).dataset_commit }}` -Configuration repository branch: `{{ ($one_content_dict).config_repository_branch }}` -{{ else -}} -Dataset: `{{$dataset}}`, Dataset repository branch: `{{ ($one_content_dict).dataset_repository_branch }}`, commit: `{{ ($one_content_dict).dataset_commit }}` -{{ end -}} -{{ template "print_table_nlu" (dict "results_for_dataset" $results_for_dataset "results_for_dataset_main" $results_for_dataset_main) }} -{{ template "print_table_core" (dict "results_for_dataset" $results_for_dataset "results_for_dataset_main" $results_for_dataset_main) }} -{{- end }} \ No newline at end of file diff --git a/.github/tests/test_mr_publish_results.py b/.github/tests/test_mr_publish_results.py index 4408a752e7fb..c1c05cdd517e 100644 --- a/.github/tests/test_mr_publish_results.py +++ b/.github/tests/test_mr_publish_results.py @@ -9,16 +9,24 @@ prepare_ml_metrics, transform_to_seconds, generate_json, + prepare_datadog_tags, ) EXAMPLE_CONFIG = "Sparse + BERT + DIET(seq) + ResponseSelector(t2t)" -EXAMPLE_DATASET = "financial-demo" +EXAMPLE_DATASET_NAME = "financial-demo" ENV_VARS = { + "BRANCH": "my-branch", + "PR_ID": "10927", + "PR_URL": "https://github.com/RasaHQ/rasa/pull/10856/", + "GITHUB_EVENT_NAME": "pull_request", + "GITHUB_RUN_ID": "1882718340", + "GITHUB_SHA": "abc", + "GITHUB_WORKFLOW": "CI - Model Regression", "IS_EXTERNAL": "false", "DATASET_REPOSITORY_BRANCH": "main", "CONFIG": EXAMPLE_CONFIG, - "DATASET": EXAMPLE_DATASET, + "DATASET_NAME": EXAMPLE_DATASET_NAME, "CONFIG_REPOSITORY_BRANCH": "main", "DATASET_COMMIT": "52a3ad3eb5292d56542687e23b06703431f15ead", "ACCELERATOR_TYPE": "CPU", @@ -27,6 +35,7 @@ "TOTAL_RUN_TIME": "5m58s", "TYPE": "nlu", "INDEX_REPETITION": "0", + "HOST_NAME": "github-runner-2223039222-22df222fcd-2cn7d", } @@ -34,9 +43,9 @@ def test_generate_json(): f = Path(__file__).parent / "test_data" / "intent_report.json" result = generate_json(f, task="intent_classification", data={}) - assert isinstance(result[EXAMPLE_DATASET][EXAMPLE_CONFIG], list) + assert isinstance(result[EXAMPLE_DATASET_NAME][EXAMPLE_CONFIG], list) - actual = result[EXAMPLE_DATASET][EXAMPLE_CONFIG][0]["intent_classification"] + actual = result[EXAMPLE_DATASET_NAME][EXAMPLE_CONFIG][0]["intent_classification"] expected = { "accuracy": 1.0, "weighted avg": { @@ -115,3 +124,9 @@ def test_prepare_ml_model_perf_metrics_simple(): key, value = "Intent Classification.weighted avg.f1-score", 1.0 assert key in metrics_ml and value == metrics_ml[key] + + +@mock.patch.dict(os.environ, ENV_VARS, clear=True) +def test_prepare_datadog_tags(): + tags_list = prepare_datadog_tags() + assert "dataset:financial-demo" in tags_list diff --git a/.github/workflows/ci-model-regression-on-schedule.yml b/.github/workflows/ci-model-regression-on-schedule.yml index 59cd55af13aa..2fd75972fa7a 100644 --- a/.github/workflows/ci-model-regression-on-schedule.yml +++ b/.github/workflows/ci-model-regression-on-schedule.yml @@ -12,6 +12,7 @@ env: TF_FORCE_GPU_ALLOW_GROWTH: true GITHUB_ISSUE_LABELS: '["type:bug :bug:", "tool:model-regression-tests"]' PERFORMANCE_DROP_THRESHOLD: -0.05 + NVML_INTERVAL_IN_SEC: 1 jobs: read_test_configuration: @@ -77,7 +78,7 @@ jobs: GH_RUNNER_IMAGE_TAG=$(jq -r 'if (.config | any(.TF == "${{ env.TF_VERSION }}" )) then (.config[] | select(.TF == "${{ env.TF_VERSION }}") | .IMAGE_TAG) else .default_image_tag end' .github/configs/tf-cuda.json) echo "GitHub runner image tag for TensorFlow ${{ env.TF_VERSION }} is ${GH_RUNNER_IMAGE_TAG}" echo GH_RUNNER_IMAGE_TAG=$GH_RUNNER_IMAGE_TAG >> $GITHUB_ENV - + num_max_replicas=3 matrix_length=${{ needs.read_test_configuration.outputs.matrix_length }} if [[ $matrix_length -gt $num_max_replicas ]]; then @@ -87,6 +88,23 @@ jobs: fi echo NUM_REPLICAS=$NUM_REPLICAS >> $GITHUB_ENV + - name: Send warning if the current TF version does not have CUDA image tags configured + if: env.GH_RUNNER_IMAGE_TAG == 'latest' + env: + TF_CUDA_FILE: ./github/config/tf-cuda.json + run: |- + echo "::warning file=${TF_CUDA_FILE},line=3,col=1,endColumn=3::Missing cuda config for tf ${{ env.TF_VERSION }}. If you are not sure how to config CUDA, please reach out to infrastructure." + + - name: Notify slack on tf-cuda config updates + if: env.GH_RUNNER_IMAGE_TAG == 'latest' + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + uses: voxmedia/github-action-slack-notify-build@212e9f7a9ca33368c8dd879d6053972128258985 + with: + channel_id: ${{ secrets.SLACK_ALERTS_CHANNEL_ID }} + status: WARNING + color: warning + - name: Render deployment template run: |- export GH_RUNNER_IMAGE_TAG=${{ env.GH_RUNNER_IMAGE_TAG }} @@ -119,6 +137,7 @@ jobs: status: FAILED color: danger + model_regression_test_gpu: name: Model Regression Tests - GPU continue-on-error: true @@ -147,7 +166,7 @@ jobs: path: 'dataset' ref: "main" - - name: Set DATASET and CONFIG variables + - name: Set env variables id: set_dataset_config_vars env: DATASET_NAME: "${{ matrix.dataset }}" @@ -212,6 +231,9 @@ jobs: echo "TEST_DIR=${TEST_DIR}" >> $GITHUB_ENV fi + HOST_NAME=`hostname` + echo "HOST_NAME=${HOST_NAME}" >> $GITHUB_ENV + - name: Checkout dataset - external uses: actions/checkout@v2 if: steps.set_dataset_config_vars.outputs.is_external == 'true' @@ -232,15 +254,16 @@ jobs: - name: Start Datadog Agent if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' env: - DATASET: "${{ matrix.dataset }}" + DATASET_NAME: "${{ matrix.dataset }}" CONFIG: "${{ matrix.config }}" DATASET_COMMIT: "${{ steps.set-dataset-commit.outputs.dataset_commit }}" BRANCH: ${{ github.ref }} GITHUB_SHA: "${{ github.sha }}" TYPE: "${{ matrix.type }}" DATASET_REPOSITORY_BRANCH: "main" + INDEX_REPETITION: "${{ matrix.index_repetition }}" run: | - .github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" + .github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" ${{ env.NVML_INTERVAL_IN_SEC }} - name: Set up Python 3.8 🐍 uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a @@ -324,11 +347,10 @@ jobs: echo "::set-output name=total_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" fi - - name: Generate a JSON file with a report / Publish results to Segment + Datadog + - name: Generate a JSON file with a report / Publish results to Datadog if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' env: SUMMARY_FILE: "./report.json" - SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} DATASET_NAME: ${{ matrix.dataset }} RESULT_DIR: "${{ github.workspace }}/results" CONFIG: ${{ matrix.config }} @@ -427,7 +449,7 @@ jobs: return issue.data.number - name: Notify Slack of Failure 😱 - if: failure() && steps.issue-exists.outputs.result == 'false' && github.event_name == 'schedule' + if: failure() && steps.issue-exists.outputs.result == 'false' && github.event_name == 'schedule' uses: 8398a7/action-slack@a74b761b4089b5d730d813fbedcd2ec5d394f3af # v3 with: status: custom @@ -524,16 +546,16 @@ jobs: # Get ID of last on-schedule workflow SCHEDULE_ID=$(curl -X GET -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ "https://api.github.com/repos/${{ github.repository }}/actions/workflows" \ - | jq -r '.workflows[] | select(.name == "${{ github.workflow }}") | select(.path | test("schedule")) | .id') + | jq -r '.workflows[] | select(.name == "${{ github.workflow }}") | select(.path | test("schedule")) | .id') - ARTIFACT_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ + ARTIFACT_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ "https://api.github.com/repos/${{ github.repository }}/actions/workflows/${SCHEDULE_ID}/runs?event=schedule&status=completed&branch=main&per_page=1" | jq -r .workflow_runs[0].artifacts_url) DOWNLOAD_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" "${ARTIFACT_URL}" \ - | jq -r '.artifacts[] | select(.name="report.json") | .archive_download_url') + | jq -r '.artifacts[] | select(.name == "report.json") | .archive_download_url') # Download the artifact - curl -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -LJO -H "Accept: application/vnd.github.v3+json" $DOWNLOAD_URL + curl -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -LJO -H "Accept: application/vnd.github.v3+json" $DOWNLOAD_URL # Unzip and change name unzip report.json.zip && mv report.json report_main.json @@ -551,14 +573,7 @@ jobs: - name: Analyse Performance 🔍 id: performance run: | - set +e OUTPUT="$(gomplate -d data=report.json -d results_main=report_main.json -f .github/templates/model_regression_test_results.tmpl)" - if [ $? -ne 0 ]; then - echo "New template failed. Try with legacy template." - OUTPUT="$(gomplate -d data=report.json -d results_main=report_main.json -f .github/templates/model_regression_test_results_legacy.tmpl)" - fi - set -e - OUTPUT="${OUTPUT//$'\n'/'%0A'}" OUTPUT="${OUTPUT//$'\r'/'%0D'}" OUTPUT="$(echo $OUTPUT | sed 's|`|\\`|g')" diff --git a/.github/workflows/ci-model-regression.yml b/.github/workflows/ci-model-regression.yml index b2241c74e48b..3cd7528e8f49 100644 --- a/.github/workflows/ci-model-regression.yml +++ b/.github/workflows/ci-model-regression.yml @@ -21,6 +21,7 @@ env: GCLOUD_VERSION: "318.0.0" DD_PROFILING_ENABLED: false TF_FORCE_GPU_ALLOW_GROWTH: true + NVML_INTERVAL_IN_SEC: 1 jobs: read_test_configuration: @@ -185,7 +186,7 @@ jobs: GH_RUNNER_IMAGE_TAG=$(jq -r 'if (.config | any(.TF == "${{ env.TF_VERSION }}" )) then (.config[] | select(.TF == "${{ env.TF_VERSION }}") | .IMAGE_TAG) else .default_image_tag end' .github/configs/tf-cuda.json) echo "GitHub runner image tag for TensorFlow ${{ env.TF_VERSION }} is ${GH_RUNNER_IMAGE_TAG}" echo GH_RUNNER_IMAGE_TAG=$GH_RUNNER_IMAGE_TAG >> $GITHUB_ENV - + num_max_replicas=3 matrix_length=${{ needs.read_test_configuration.outputs.matrix_length }} if [[ $matrix_length -gt $num_max_replicas ]]; then @@ -195,6 +196,23 @@ jobs: fi echo NUM_REPLICAS=$NUM_REPLICAS >> $GITHUB_ENV + - name: Send warning if the current TF version does not have CUDA image tags configured + if: env.GH_RUNNER_IMAGE_TAG == 'latest' + env: + TF_CUDA_FILE: ./github/config/tf-cuda.json + run: |- + echo "::warning file=${TF_CUDA_FILE},line=3,col=1,endColumn=3::Missing cuda config for tf ${{ env.TF_VERSION }}. If you are not sure how to config CUDA, please reach out to infrastructure." + + - name: Notify slack on tf-cuda config updates + if: env.GH_RUNNER_IMAGE_TAG == 'latest' + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + uses: voxmedia/github-action-slack-notify-build@212e9f7a9ca33368c8dd879d6053972128258985 + with: + channel_id: ${{ secrets.SLACK_ALERTS_CHANNEL_ID }} + status: WARNING + color: warning + - name: Render deployment template run: |- export GH_RUNNER_IMAGE_TAG=${{ env.GH_RUNNER_IMAGE_TAG }} @@ -309,6 +327,9 @@ jobs: echo "TEST_DIR=${TEST_DIR}" >> $GITHUB_ENV fi + HOST_NAME=`hostname` + echo "HOST_NAME=${HOST_NAME}" >> $GITHUB_ENV + - name: Checkout dataset - external uses: actions/checkout@v2 if: steps.set_dataset_config_vars.outputs.is_external == 'true' @@ -329,7 +350,7 @@ jobs: - name: Start Datadog Agent if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' env: - DATASET: "${{ matrix.dataset }}" + DATASET_NAME: "${{ matrix.dataset }}" CONFIG: "${{ matrix.config }}" DATASET_COMMIT: "${{ steps.set-dataset-commit.outputs.dataset_commit }}" BRANCH: ${{ github.head_ref }} @@ -337,9 +358,10 @@ jobs: PR_ID: "${{ github.event.number }}" TYPE: "${{ matrix.type }}" DATASET_REPOSITORY_BRANCH: ${{ needs.read_test_configuration.outputs.dataset_branch }} + INDEX_REPETITION: "${{ matrix.index_repetition }}" run: | export PR_URL="https://github.com/${GITHUB_REPOSITORY}/pull/${{ github.event.number }}" - .github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" + .github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" ${{ env.NVML_INTERVAL_IN_SEC }} - name: Set up Python 3.8 🐍 uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a @@ -423,11 +445,10 @@ jobs: echo "::set-output name=total_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" fi - - name: Generate a JSON file with a report / Publish results to Segment + Datadog + - name: Generate a JSON file with a report / Publish results to Datadog if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' env: SUMMARY_FILE: "./report.json" - SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} DATASET_NAME: ${{ matrix.dataset }} RESULT_DIR: "${{ github.workspace }}/results" CONFIG: ${{ matrix.config }} @@ -492,7 +513,7 @@ jobs: sudo curl -o /usr/local/bin/gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64 sudo chmod +x /usr/local/bin/gomplate - - name: Set DATASET and CONFIG variables + - name: Set env variables id: set_dataset_config_vars env: DATASET_NAME: "${{ matrix.dataset }}" @@ -551,6 +572,9 @@ jobs: echo "TEST_DIR=${TEST_DIR}" >> $GITHUB_ENV fi + HOST_NAME=`hostname` + echo "HOST_NAME=${HOST_NAME}" >> $GITHUB_ENV + - name: Checkout dataset - external uses: actions/checkout@v2 if: steps.set_dataset_config_vars.outputs.is_external == 'true' @@ -571,7 +595,7 @@ jobs: - name: Start Datadog Agent if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' env: - DATASET: "${{ matrix.dataset }}" + DATASET_NAME: "${{ matrix.dataset }}" CONFIG: "${{ matrix.config }}" DATASET_COMMIT: "${{ steps.set-dataset-commit.outputs.dataset_commit }}" BRANCH: ${{ github.head_ref }} @@ -579,9 +603,10 @@ jobs: PR_ID: "${{ github.event.number }}" TYPE: "${{ matrix.type }}" DATASET_REPOSITORY_BRANCH: ${{ matrix.dataset_branch }} + INDEX_REPETITION: "${{ matrix.index_repetition }}" run: | export PR_URL="https://github.com/${GITHUB_REPOSITORY}/pull/${{ github.event.number }}" - .github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" + .github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" ${{ env.NVML_INTERVAL_IN_SEC }} - name: Set up Python 3.8 🐍 uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a @@ -665,11 +690,10 @@ jobs: echo "::set-output name=total_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" fi - - name: Generate a JSON file with a report / Publish results to Segment + Datadog + - name: Generate a JSON file with a report / Publish results to Datadog if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' env: SUMMARY_FILE: "./report.json" - SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} DATASET_NAME: ${{ matrix.dataset }} RESULT_DIR: "${{ github.workspace }}/results" CONFIG: ${{ matrix.config }} @@ -794,16 +818,16 @@ jobs: # Get ID of last on-schedule workflow SCHEDULE_ID=$(curl -X GET -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ "https://api.github.com/repos/${{ github.repository }}/actions/workflows" \ - | jq -r '.workflows[] | select(.name == "CI - Model Regression on schedule") | select(.path | test("schedule")) | .id') + | jq -r '.workflows[] | select(.name == "CI - Model Regression on schedule") | select(.path | test("schedule")) | .id') - ARTIFACT_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ + ARTIFACT_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ "https://api.github.com/repos/${{ github.repository }}/actions/workflows/${SCHEDULE_ID}/runs?event=schedule&status=completed&branch=main&per_page=1" | jq -r .workflow_runs[0].artifacts_url) DOWNLOAD_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" "${ARTIFACT_URL}" \ - | jq -r '.artifacts[] | select(.name="report.json") | .archive_download_url') + | jq -r '.artifacts[] | select(.name == "report.json") | .archive_download_url') # Download the artifact - curl -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -LJO -H "Accept: application/vnd.github.v3+json" $DOWNLOAD_URL + curl -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -LJO -H "Accept: application/vnd.github.v3+json" $DOWNLOAD_URL # Unzip and change name unzip report.json.zip && mv report.json report_main.json @@ -821,17 +845,22 @@ jobs: - name: Render a comment to add id: get_results run: | - set +e OUTPUT="$(gomplate -d data=report.json -d results_main=report_main.json -f .github/templates/model_regression_test_results.tmpl)" - if [ $? -ne 0 ]; then - echo "New template failed. Try with legacy template." - OUTPUT="$(gomplate -d data=report.json -d results_main=report_main.json -f .github/templates/model_regression_test_results_legacy.tmpl)" - fi - set -e OUTPUT="${OUTPUT//$'\n'/'%0A'}" OUTPUT="${OUTPUT//$'\r'/'%0D'}" echo "::set-output name=result::$OUTPUT" + # Get time of current commit as start time + TIME_ISO_COMMIT=$(gomplate -d github=https://api.github.com/repos/rasaHQ/rasa/commits/${{ github.sha }} -H 'github=Authorization:token ${{ secrets.GITHUB_TOKEN }}' -i '{{ (ds "github").commit.author.date }}') # Example "2022-02-17T14:06:38Z" + TIME_UNIX_COMMIT=$(date -d "${TIME_ISO_COMMIT}" +%s%3N) # Example: "1645106798" + + # Get current time + TIME_ISO_NOW=$(gomplate -i '{{ (time.Now).UTC.Format time.RFC3339}}') # Example: "2022-02-17T14:50:54Z%" + TIME_UNIX_NOW=$(date -d "${TIME_ISO_NOW}" +%s%3N) # Example: "1645118083" + + echo "::set-output name=from_ts::$TIME_UNIX_COMMIT" + echo "::set-output name=to_ts::$TIME_UNIX_NOW" + - name: Publish results as a PR comment uses: marocchino/sticky-pull-request-comment@v2.2.0 if: ${{ always() }} @@ -844,6 +873,8 @@ jobs: Commit: ${{ github.sha }}, [The full report is available as an artifact.](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) + [Datadog dashboard link](https://app.datadoghq.eu/dashboard/mf4-2hu-x84?tpl_var_branch_baseline=${{ github.head_ref }}&from_ts=${{ steps.get_results.outputs.from_ts }}&to_ts=${{ steps.get_results.outputs.to_ts }}&live=false) + ${{ steps.get_results.outputs.result }} - name: Remove 'status:model-regression-tests' label diff --git a/CHANGELOG.mdx b/CHANGELOG.mdx index 1cc633dfc117..c1f281f770c5 100644 --- a/CHANGELOG.mdx +++ b/CHANGELOG.mdx @@ -3051,7 +3051,7 @@ https://github.com/RasaHQ/rasa/tree/main/changelog/ . --> `1.8.0`. Since version `1.8.0` the Rasa SDK Docker images does not longer run as `root` user by default. For commands which require `root` user usage, you have to switch back to the `root` user in your Docker image as described in - Building an Action Server Image. + [Building an Action Server Image](https://rasa.com/docs/action-server/deploy-action-server#building-an-action-server-image). * [#5402](https://github.com/RasaHQ/rasa/issues/5402): Made improvements to Building Assistants tutorial diff --git a/changelog/10798.doc.md b/changelog/10798.doc.md new file mode 100644 index 000000000000..8fe93086e740 --- /dev/null +++ b/changelog/10798.doc.md @@ -0,0 +1,2 @@ +Updated the `model_confidence` parameter in `TEDPolicy` and `DIETClassifier`. The `linear_norm` is removed +as it is no longer supported. \ No newline at end of file diff --git a/changelog/10807.misc.md b/changelog/10807.misc.md new file mode 100644 index 000000000000..2d6f7e3c58c8 --- /dev/null +++ b/changelog/10807.misc.md @@ -0,0 +1,6 @@ +Domain loading was improved in certain areas: +- unified the different merging methods in order to use a single method in the case of both loading a list of paths and domain directory. +- extracted several general utility methods used in merging to `rasa/shared/utils/common.py` +- removed the `self.duplicates` attributes stored in the `Domain` instance and raised a warning instead when duplicates are found during merging +- removed the `Validator.verify_domain_duplicates` method making use of the `self.duplicates` removed attribute +- serialisation of `Domain` objects now uses the initial dictionary representation (rather than the re-transformed version used before); diff --git a/changelog/10925.improvement.md b/changelog/10925.improvement.md new file mode 100644 index 000000000000..65116591313e --- /dev/null +++ b/changelog/10925.improvement.md @@ -0,0 +1 @@ +Add verify loop caused by checkpoints in "rasa data validate" \ No newline at end of file diff --git a/changelog/10940.doc.md b/changelog/10940.doc.md new file mode 100644 index 000000000000..d8c4b5f788d8 --- /dev/null +++ b/changelog/10940.doc.md @@ -0,0 +1,2 @@ +Added an additional step to `Receiving Messages` section in slack.mdx documentation. After a slack update this +additional step is needed to allow direct messages to the bot. diff --git a/changelog/10957.doc.md b/changelog/10957.doc.md new file mode 100644 index 000000000000..d6e92c3a8f87 --- /dev/null +++ b/changelog/10957.doc.md @@ -0,0 +1 @@ +Backport the updated deployment docs to 3.0.x. diff --git a/changelog/9094.misc.md b/changelog/9094.misc.md new file mode 100644 index 000000000000..10f29fcacc3c --- /dev/null +++ b/changelog/9094.misc.md @@ -0,0 +1 @@ +Enable `mypy` `var-annotated` check and fix any resulting errors. diff --git a/changelog/9096.misc.md b/changelog/9096.misc.md new file mode 100644 index 000000000000..96e2d2fe90f5 --- /dev/null +++ b/changelog/9096.misc.md @@ -0,0 +1 @@ +Enable `mypy` `union-attr` check and fix any resulting errors. diff --git a/changelog/9098.misc.md b/changelog/9098.misc.md new file mode 100644 index 000000000000..0ed1e4791401 --- /dev/null +++ b/changelog/9098.misc.md @@ -0,0 +1 @@ +Enable `mypy` `attr-defined` check and fix any resulting errors. diff --git a/changelog/_template.md.jinja2 b/changelog/_template.md.jinja2 index 1369ad3fba7a..a96ea9aacf48 100644 --- a/changelog/_template.md.jinja2 +++ b/changelog/_template.md.jinja2 @@ -1,5 +1,5 @@ {# Based on https://github.com/hawkowl/towncrier/blob/master/src/towncrier/templates/default.rst #} -{% for section in sections %}{% if section %}{{section}}{% endif %}{% if sections[section] %}{% for category, val in definitions.items() if category in sections[section] %} +{% if top_line %}{{ top_line }} {{ top_underline * ((top_line)|length)}} {% elif versiondata.name %}{{ versiondata.name }} {{ versiondata.version }} ({{ versiondata.date }}) {{ top_underline * ((versiondata.name + versiondata.version + versiondata.date)|length + 4)}}{% else %}{{ versiondata.version }} ({{ versiondata.date }}) {{ top_underline * ((versiondata.version + versiondata.date)|length + 3)}}{% endif %}{% for section in sections %}{% if section %}{{section}}{% endif %}{% if sections[section] %}{% for category, val in definitions.items() if category in sections[section] %} {{ "### " + definitions[category]['name'] }} {% if definitions[category]['showcontent'] %}{% for text, values in sections[section][category]|dictsort(by='value') %}{% set issue_joiner = joiner(', ') %}- {% for value in values|sort %}{{ issue_joiner() }}{{ value }}{% endfor %}: {{ text }} @@ -7,4 +7,4 @@ {% else %}{% endif %}{% endfor %}{% else %} No significant changes. -{% endif %}{% endfor %} \ No newline at end of file +{% endif %}{% endfor %} diff --git a/data/test_domains/duplicate_entities.yml b/data/test_domains/duplicate_entities.yml index 2199cefb1791..c3fcf1f7bfca 100644 --- a/data/test_domains/duplicate_entities.yml +++ b/data/test_domains/duplicate_entities.yml @@ -2,8 +2,6 @@ intents: - greet - default - goodbye - - default - - goodbye slots: cuisine: diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx index dd87fb1cee98..aab758b6c06d 100644 --- a/docs/docs/components.mdx +++ b/docs/docs/components.mdx @@ -1394,12 +1394,10 @@ More details on the parameters can be found on the [scikit-learn documentation p This should help in better generalization of the model to real world test sets. * `model_confidence`: - This parameter allows the user to configure how confidences are computed during inference. It can take two values: - * `softmax`: Confidences are in the range `[0, 1]` (old behavior and current default). Computed similarities are normalized with the `softmax` activation function. - * `linear_norm`: Confidences are in the range `[0, 1]`. Computed dot product similarities are normalized with a linear function. - - Please try using `linear_norm` as the value for `model_confidence`. This should make it easier to tune fallback thresholds for the [FallbackClassifier](./components.mdx#fallbackclassifier). - + This parameter allows the user to configure how confidences are computed during inference. It can take only + one value as input which is `softmax`. In `softmax`, confidences are in the range `[0, 1]`. The computed + similarities are normalized with the `softmax` activation function. + The above configuration parameters are the ones you should configure to fit your model to your data. However, additional parameters exist that can be adapted. @@ -2674,12 +2672,9 @@ Selectors predict a bot response from a set of candidate responses. This should help in better generalization of the model to real world test sets. * `model_confidence`: - This parameter allows the user to configure how confidences are computed during inference. It can take two values: - * `softmax`: Confidences are in the range `[0, 1]` (old behavior and current default). Computed similarities are normalized with the `softmax` activation function. - * `linear_norm`: Confidences are in the range `[0, 1]`. Computed dot product similarities are normalized with a linear function. - - Please try using `linear_norm` as the value for `model_confidence`. This should make it easier to tune fallback thresholds for the [FallbackClassifier](./components.mdx#fallbackclassifier). - + This parameter allows the user to configure how confidences are computed during inference. It can take only + one value as input which is `softmax`. In `softmax`, confidences are in the range `[0, 1]`. The computed + similarities are normalized with the `softmax` activation function. The component can also be configured to train a response selector for a particular retrieval intent. The parameter `retrieval_intent` sets the name of the retrieval intent for which this response selector model is trained. diff --git a/docs/docs/connectors/img/slack-app-home.png b/docs/docs/connectors/img/slack-app-home.png new file mode 100644 index 000000000000..c42e39996b7b Binary files /dev/null and b/docs/docs/connectors/img/slack-app-home.png differ diff --git a/docs/docs/connectors/slack.mdx b/docs/docs/connectors/slack.mdx index 472b645e00c8..646766a1285d 100644 --- a/docs/docs/connectors/slack.mdx +++ b/docs/docs/connectors/slack.mdx @@ -11,6 +11,7 @@ import interactivityImg from './img/slack-interactivity.png'; import requestUrlImg from './img/slack-request-url.png'; import scopesImg from './img/slack-scopes.png'; import secretImg from './img/slack-secret.png'; +import appHomeImg from './img/slack-create-app.png'; Connecting a bot to Slack requires you to configure it to send messages (using API credentials) and to receive messages (using a webhook). @@ -110,7 +111,15 @@ your bot and tell you about new messages. If you are running locally, you can If you are running locally, make sure ngrok (or another tool to retrieve a public url) is running as well. -2. Configure the webhook by heading to **Event Subscriptions** and +2. To send messages directly to your bot using the slack UI, head to **App Home**, + scroll to the bottom and select the checkbox for + `Allow users to send Slash commands and messages from the messages tab.` + + You might have to quit the Slack app and re-open it before your changes take effect. + + Allow users to send Slash commands and messages from the messages tab + +3. Configure the webhook by heading to **Event Subscriptions** and turning **Enable Events** on. As a request URL enter the public url of your bot and append `/webhooks/slack/webhook`, e.g. @@ -121,7 +130,7 @@ your bot and tell you about new messages. If you are running locally, you can Request URL Screenshot -3. As a last step, you'll need to **Subscribe to bot events** on the same page. +4. As a last step, you'll need to **Subscribe to bot events** on the same page. You'll need to add the following events: - `message.channels`, - `message.groups`, diff --git a/docs/docs/deploy/deploy-action-server.mdx b/docs/docs/deploy/deploy-action-server.mdx index 9363d34fa8bb..fbe94a4ae916 100644 --- a/docs/docs/deploy/deploy-action-server.mdx +++ b/docs/docs/deploy/deploy-action-server.mdx @@ -1,16 +1,16 @@ --- id: deploy-action-server -sidebar_label: "Step 2: Deploy Rasa Action Server" -title: "Step 2: Deploy Action Server" -description: The second step to deploy your Rasa assistant +sidebar_label: "Deploy Action Server" +title: "Deploy Action Server" +description: Deploy and connect to your custom action server abstract: This page shows you where to find how to deploy Rasa Action Server and how to build a custom Docker image. --- import variables from './../variables.json'; -## a. Deploy Rasa Action Server +## a. Deploy Action Server -Visit the [Installation Guide for Rasa Action Server](https://rasa.com/docs/action-server/deploy-action-server#a-installation) to learn how to deploy an Action Server using helm and how to [build an Action Server image](https://rasa.com/docs/action-server/deploy-action-server#building-an-action-server-image) that can be used along with your deployment. +Visit the [Rasa Action Server docs](https://rasa.com/docs/action-server/deploy-action-server#a-installation) to learn how to [build an Action Server image](https://rasa.com/docs/action-server/deploy-action-server#building-an-action-server-image) and how to deploy an Action Server using Helm. ## b. Connect Rasa Action Server with Rasa Open Source deployment diff --git a/docs/docs/deploy/deploy-rasa-x.mdx b/docs/docs/deploy/deploy-rasa-x.mdx index 86e7209e4c68..71d23610eb17 100644 --- a/docs/docs/deploy/deploy-rasa-x.mdx +++ b/docs/docs/deploy/deploy-rasa-x.mdx @@ -1,9 +1,9 @@ --- id: deploy-rasa-x -sidebar_label: "Step 3: Deploy Rasa X" -title: "Step 3: Deploy Rasa X" -description: "Deploying your Rasa assistant: Step 3" -abstract: This page shows you where to find out how to deploy Rasa X and forward events to it from your Rasa Open Source deployment. The third and final step in deploying your Rasa assistant. +sidebar_label: "Deploy Rasa X" +title: "Deploy Rasa X" +description: "Deploying Rasa X to improve your Rasa assistant" +abstract: This page shows you where to find out how to deploy Rasa X and forward events to it from your Rasa Open Source deployment. --- import variables from './../variables.json'; diff --git a/docs/docs/deploy/deploy-rasa.mdx b/docs/docs/deploy/deploy-rasa.mdx index 6f2c407331f1..ce217a9a4c0f 100644 --- a/docs/docs/deploy/deploy-rasa.mdx +++ b/docs/docs/deploy/deploy-rasa.mdx @@ -1,9 +1,9 @@ --- id: deploy-rasa -sidebar_label: "Step 1: Deploy Rasa Open Source" -title: "Step 1: Deploy Rasa Open Source" -description: The first of three steps to deploy your Rasa assistant -abstract: This page explains how to deploy Rasa Open Source using Helm. The first of three steps in deploying your Rasa assistant. +sidebar_label: "Deploy Rasa Open Source" +title: "Deploy Rasa Open Source" +description: Deploy a Rasa assistant on Kubernetes/Openshift using Helm +abstract: This page explains how to deploy Rasa Open Source using Helm. --- import variables from './../variables.json'; @@ -231,8 +231,8 @@ will block until the Rasa deployment is ready: ### 5. Access Rasa Open Source Assistant -By default the Rasa deployment is exposed via the `rasa` (``) service and accessible only within a Kubernetes cluster. You can get -the IP address using this command: +By default the Rasa deployment is exposed via the `rasa` (``) service and accessible only within a Kubernetes cluster. +To access Rasa Open Source Assistant by using `kubectl port-forward`, use these commands: @@ -255,21 +255,34 @@ the IP address using this command: You can then access the deployment on `http://127.0.0.1:${SERVICE_PORT}` -Visit [the Rasa helm chart README](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa#exposing-the-rasa-deployment-to-the-public) to learn other ways to expose your deployment. +The other option is to expose your deployment on `NodePort` and access it directly. -## Next Steps +1. Prepare configuration that switch the rasa service to `NodePort`. -- Visit [the Rasa helm chart repository](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa) where you can find examples of configuration -- Visit [the Rasa X docs](https://rasa.com/docs/rasa-x/) and learn how to [integrate your Rasa Open Source deployment with Rasa X](https://rasa.com/docs/rasa-x/installation-and-setup/deploy#2-connect-rasa-open-source-deployment-the-rasa-helm-chart). + ```yaml + # rasa-values.yaml + service: + type: "NodePort" + ``` + +2. Upgrade deployment. -## Alternative Deployment Methods + ```text + helm upgrade --namespace --reuse-values -f rasa-values.yaml rasa/rasa + ``` -It is also possible to deploy a Rasa assistant using Rasa Ephemeral Installer, Docker or Docker Compose. Choose one of the alternatives methods listed below to see details. +3. Get the node port and address for the rasa service -* [Developing an assistant locally](../installation.mdx) + ```text + export NODE_PORT=$(kubectl get --namespace -o jsonpath="{.spec.ports[0].nodePort}" services ) -* [Developing an assistant in a Docker container](../docker/building-in-docker.mdx) + $ curl http://127.0.0.1:${NODE_PORT} + Hello from Rasa: 2.8.7 + ``` -* [Deploying an assistant using the Rasa Ephemeral Installer (REI)](../rei/deploy.mdx) +Visit [the Rasa helm chart README](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa#exposing-the-rasa-deployment-to-the-public) to learn other ways to expose your deployment. -* [Deploying an assistant with Docker Compose](../docker/deploying-in-docker-compose.mdx) +## Next Steps + +- Visit [the Rasa helm chart repository](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa) where you can find examples of configuration +- Visit [the Rasa X docs](https://rasa.com/docs/rasa-x/) and learn how to [integrate your Rasa Open Source deployment with Rasa X](https://rasa.com/docs/rasa-x/installation-and-setup/deploy#2-connect-rasa-open-source-deployment-the-rasa-helm-chart). diff --git a/docs/docs/deploy/introduction.mdx b/docs/docs/deploy/introduction.mdx index 7caf651cecab..0d170c6bf01f 100644 --- a/docs/docs/deploy/introduction.mdx +++ b/docs/docs/deploy/introduction.mdx @@ -1,7 +1,7 @@ --- id: introduction sidebar_label: Introduction -title: Deploying Your Rasa Assistant +title: Deploying a Rasa Assistant description: How to deploy your Rasa Assistant with Kubernetes/Openshift abstract: This section explains when and how to deploy an assistant built with Rasa. It will allow you to make your assistant available to users and set you up with a production-ready environment. @@ -9,45 +9,46 @@ abstract: This section explains when and how to deploy an assistant built with R import variables from './../variables.json'; +:::note +Are you unfamiliar with Docker, Kubernetes and Helm? Check out "[Understanding Rasa Deployments](https://www.youtube.com/watch?v=aAs_RS0ueEw&list=PL75e0qA87dlHmfmu7oPPYA22fmc6GJ2aW)" on our [YouTube channel](https://www.youtube.com/channel/UCJ0V6493mLvqdiVwOKWBODQ). +::: + ## When to Deploy Your Assistant The best time to deploy your assistant and make it available to test users is once it can handle the most -important happy paths or is what we call a [minimum viable assistant](../glossary.mdx). +important happy paths or is what we call a [minimum viable assistant](../glossary.mdx). Then you can use incoming +conversations to inform further development of your assistant. -The recommended deployment method described in the Deploy Your Assistant section makes it easy to share your assistant +Connecting your deployed assistant to Rasa X makes it easy to share your assistant with test users via the [share your assistant feature in Rasa X](https://rasa.com/docs/rasa-x/user-guide/share-assistant#share-your-bot). Then, when you're ready to make your assistant available via one or more [Messaging and Voice Channels](../messaging-and-voice-channels.mdx), -you can easily add them to your existing deployment set up. +you can add them to your existing deployment set up. +See the [Rasa X Installation Guide](https://rasa.com/docs/rasa-x/installation-and-setup/installation-guide/) to learn how to deploy Rasa X and connect it to your Rasa Open Source deployment. -## Recommended Deployment Method -The [Rasa Open Source Helm chart](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa) is the production ready, recommended method to deploy -your assistant. It enables you to connect your live assistant to a Rasa X or -Rasa Enterprise deployment. See the [Alternative Deployment Methods](./deploy-rasa.mdx#alternative-deployment-methods) -for details on building your bot locally, with Docker, Rasa Ephemeral Installer, or deploying with docker-compose. - -For more details on Rasa X deployment methods see the [Rasa X Installation Guide](https://rasa.com/docs/rasa-x/installation-and-setup/installation-guide/). - -The following instructions describe how to deploy a Rasa Open Source server -by using the [Rasa Helm Chart](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa) in a scalable cluster environment using OpenShift or Kubernetes (K8S). +## Recommended Deployment Method +The [Rasa Helm chart](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa) is the production ready method to deploy +your assistant on a Kubernetes or Openshift cluster. For details, see the [deployment instructions](./deploy-rasa.mdx). ### Cluster Requirements To install the Rasa Helm chart, you need an existing [Kubernetes cluster](https://kubernetes.io/) or [OpenShift cluster](https://www.openshift.com/). -Setting up a Kubernetes / OpenShift cluster can be tedious, hence we -recommend to get a managed cluster from a cloud provider like -[Google Cloud](https://cloud.google.com/kubernetes-engine), -[DigitalOcean](https://www.digitalocean.com/products/kubernetes/), -[Microsoft Azure](https://azure.microsoft.com/en-us/services/kubernetes-service/), or -[Amazon EKS](https://aws.amazon.com/eks/). +If you don't have one yet, you can get a managed cluster from a cloud provider like: +* [Google Cloud](https://cloud.google.com/kubernetes-engine), +* [DigitalOcean](https://www.digitalocean.com/products/kubernetes/), +* [Microsoft Azure](https://azure.microsoft.com/en-us/services/kubernetes-service/), or +* [Amazon EKS](https://aws.amazon.com/eks/). -:::note -The Rasa Helm chart is open source and available in the -[helm-charts repository](https://github.com/rasahq/helm-charts). -Please -[create an issue](https://github.com/RasaHQ/helm-charts/issues/new) in this -repository if you discover bugs or have suggestions for improvements. +If you are looking for a lightweight, non-production cluster that can run on a single machine, check out the instructions for using [Rasa Ephemeral Installer (REI)](../deploy/rei/deploy.mdx). REI will help you set up a local Kubernetes cluster on which you can deploy your assistant using the Rasa Helm chart. -::: +## Alternative Deployment Methods + +The following deployment methods are not suited to a production deployment, but can be useful for development and testing: + +* [Running an assistant locally on the command line](../command-line-interface.mdx#rasa-run) + +* [Developing an assistant in a Docker container](../docker/building-in-docker.mdx) + +* [Deploying an assistant with Docker Compose](../docker/deploying-in-docker-compose.mdx) diff --git a/docs/docs/deploy/rei/deploy.mdx b/docs/docs/deploy/rei/deploy.mdx new file mode 100644 index 000000000000..7743c90a1bd5 --- /dev/null +++ b/docs/docs/deploy/rei/deploy.mdx @@ -0,0 +1,50 @@ +--- +id: using-rei +sidebar_label: Set up a Local Cluster using Rasa Ephemeral Installer (REI) +title: Set up a Local Cluster using Rasa Ephemeral Installer (REI) +description: Learn how set up a local Kubernetes cluster using Rasa Ephemeral Installer (REI) +--- +import useBaseUrl from '@docusaurus/useBaseUrl'; + + +import variables from '../../variables.json'; + +:::note +Are you unfamiliar with Docker, Kubernetes and Helm? Check out "[Understanding Rasa Deployments](https://www.youtube.com/watch?v=aAs_RS0ueEw&list=PL75e0qA87dlHmfmu7oPPYA22fmc6GJ2aW)" on our [YouTube channel](https://www.youtube.com/channel/UCJ0V6493mLvqdiVwOKWBODQ). +::: + +If you would like to deploy Rasa Assistant using the [Rasa OSS Helm chart](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa) on your machine, +you can use Rasa Ephemeral Installer which installs all tools and creates a local Kubernetes cluster that allows you to use the [Rasa OSS Helm chart](https://github.com/RasaHQ/helm-charts/tree/main/charts/rasa). + +We recommend this method as an alternative to docker-compose. + +## Create a local Kubernetes cluster via REI + +The Rasa Ephemeral Install installs the following tools and creates a local Kubernetes cluster using `kind`. + +Tools installed by [REI](https://github.com/RasaHQ/REI): + +- [docker](https://www.docker.com/) +- [kind](https://kind.sigs.k8s.io/) +- [kubectl](https://kubernetes.io/docs/reference/kubectl/kubectl/) +- [helm](https://helm.sh/) +- [rasactl](https://github.com/RasaHQ/rasactl) + +1. Simply execute the following command. + + ```text + curl -O https://rei.rasa.com/rei.sh && bash rei.sh -y + ``` + + After a few minutes, all components should be installed and a local Kubernetes cluster created. + + :::tip + + You can use the `kubectl cluster-info` to verify if all is good. + + ::: + + +## Next steps + +Follow the Kubernetes instructions for [deploying using the Rasa Helm Chart](../deploy-rasa.mdx#installation). diff --git a/docs/docs/docker/building-in-docker.mdx b/docs/docs/docker/building-in-docker.mdx index 91bf49a56387..4a36d196b980 100644 --- a/docs/docs/docker/building-in-docker.mdx +++ b/docs/docs/docker/building-in-docker.mdx @@ -11,7 +11,8 @@ import variables from '../variables.json'; If you don't have a Rasa project yet, you can build one in Docker without having to install Rasa Open Source on your local machine. If you already have a model you're satisfied with, see -[Deploying Your Rasa Assistant](../deploy/introduction.mdx) to learn how to deploy your model. +[Deploying a Rasa Assistant](../deploy/introduction.mdx) to learn how to deploy your model. + ## Installing Docker diff --git a/docs/docs/tracker-stores.mdx b/docs/docs/tracker-stores.mdx index 6f5359151152..7e43af4b751a 100644 --- a/docs/docs/tracker-stores.mdx +++ b/docs/docs/tracker-stores.mdx @@ -155,7 +155,7 @@ Then build the docker image: Now you can configure the tracker store in the `endpoints.yml` as described above, and start the container. The `dialect` parameter with this setup will be `oracle+cx_oracle`. -Read more about [Deploying Your Rasa Assistant](./deploy/introduction.mdx). +Read more about [Deploying a Rasa Assistant](./deploy/introduction.mdx). ## RedisTrackerStore diff --git a/docs/sidebars.js b/docs/sidebars.js index 9dc27a162951..751b562e781f 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -38,17 +38,31 @@ module.exports = { 'tuning-your-model', 'testing-your-assistant', 'setting-up-ci-cd', - { - type: 'category', - label: 'Deploying Your Assistant', - collapsed: true, - items: ['deploy/introduction', 'deploy/deploy-rasa', 'deploy/deploy-action-server', 'deploy/deploy-rasa-x'], - }, ], }, "glossary", ], }, + { + type: 'category', + label: 'Deploying Assistants', + collapsed: true, + items: [ + 'deploy/introduction', + 'deploy/deploy-rasa', + 'deploy/deploy-action-server', + 'deploy/deploy-rasa-x', + { + type: 'category', + label: 'Deployment Tools', + collapsed: true, + items: [ + 'deploy/rei/using-rei' + + ], + } + ], + }, { type: 'category', label: 'Concepts', diff --git a/poetry.lock b/poetry.lock index 3e1e35bd6cd5..097253c519f1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1505,21 +1505,21 @@ python-versions = "*" [[package]] name = "mypy" -version = "0.910" +version = "0.931" description = "Optional static typing for Python" category = "dev" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" [package.dependencies] -mypy-extensions = ">=0.4.3,<0.5.0" -toml = "*" -typed-ast = {version = ">=1.4.0,<1.5.0", markers = "python_version < \"3.8\""} -typing-extensions = ">=3.7.4" +mypy-extensions = ">=0.4.3" +tomli = ">=1.1.0" +typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} +typing-extensions = ">=3.10" [package.extras] dmypy = ["psutil (>=4.0)"] -python2 = ["typed-ast (>=1.4.0,<1.5.0)"] +python2 = ["typed-ast (>=1.4.0,<2)"] [[package]] name = "mypy-extensions" @@ -3241,6 +3241,14 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "types-redis" +version = "4.1.16" +description = "Typing stubs for redis" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "types-requests" version = "2.27.9" @@ -3446,7 +3454,7 @@ transformers = ["transformers"] [metadata] lock-version = "1.1" python-versions = ">=3.7,<3.9" -content-hash = "c483c3745aa5791421ff95373e2508cbbd5f61dcc75f8e95f9e0d92cd2c985f9" +content-hash = "a5728f4862dfc3cb154339b6801ccece732aba2422313b16d00a65e55c947e54" [metadata.files] absl-py = [ @@ -4262,9 +4270,6 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad"}, {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d"}, {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a"}, {file = "MarkupSafe-2.0.1-cp310-cp310-win32.whl", hash = "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28"}, {file = "MarkupSafe-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"}, @@ -4276,9 +4281,6 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145"}, - {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd"}, - {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f"}, - {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"}, @@ -4290,9 +4292,6 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864"}, - {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207"}, - {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9"}, - {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"}, {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9"}, @@ -4305,9 +4304,6 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6"}, - {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f"}, - {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194"}, - {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee"}, {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"}, {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"}, {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"}, @@ -4320,9 +4316,6 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1"}, {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac"}, {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6"}, - {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047"}, - {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e"}, - {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1"}, {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"}, {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"}, {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"}, @@ -4509,29 +4502,26 @@ murmurhash = [ {file = "murmurhash-1.0.6.tar.gz", hash = "sha256:00a5252b569d3f914b5bd0bce72d2efe9c0fb91a9703556ea1b608b141c68f2d"}, ] mypy = [ - {file = "mypy-0.910-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457"}, - {file = "mypy-0.910-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb"}, - {file = "mypy-0.910-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9"}, - {file = "mypy-0.910-cp35-cp35m-win_amd64.whl", hash = "sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e"}, - {file = "mypy-0.910-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921"}, - {file = "mypy-0.910-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6"}, - {file = "mypy-0.910-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212"}, - {file = "mypy-0.910-cp36-cp36m-win_amd64.whl", hash = "sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885"}, - {file = "mypy-0.910-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0"}, - {file = "mypy-0.910-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de"}, - {file = "mypy-0.910-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703"}, - {file = "mypy-0.910-cp37-cp37m-win_amd64.whl", hash = "sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a"}, - {file = "mypy-0.910-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504"}, - {file = "mypy-0.910-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9"}, - {file = "mypy-0.910-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072"}, - {file = "mypy-0.910-cp38-cp38-win_amd64.whl", hash = "sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811"}, - {file = "mypy-0.910-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e"}, - {file = "mypy-0.910-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b"}, - {file = "mypy-0.910-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2"}, - {file = "mypy-0.910-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97"}, - {file = "mypy-0.910-cp39-cp39-win_amd64.whl", hash = "sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8"}, - {file = "mypy-0.910-py3-none-any.whl", hash = "sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d"}, - {file = "mypy-0.910.tar.gz", hash = "sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150"}, + {file = "mypy-0.931-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3c5b42d0815e15518b1f0990cff7a705805961613e701db60387e6fb663fe78a"}, + {file = "mypy-0.931-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c89702cac5b302f0c5d33b172d2b55b5df2bede3344a2fbed99ff96bddb2cf00"}, + {file = "mypy-0.931-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:300717a07ad09525401a508ef5d105e6b56646f7942eb92715a1c8d610149714"}, + {file = "mypy-0.931-cp310-cp310-win_amd64.whl", hash = "sha256:7b3f6f557ba4afc7f2ce6d3215d5db279bcf120b3cfd0add20a5d4f4abdae5bc"}, + {file = "mypy-0.931-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1bf752559797c897cdd2c65f7b60c2b6969ffe458417b8d947b8340cc9cec08d"}, + {file = "mypy-0.931-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4365c60266b95a3f216a3047f1d8e3f895da6c7402e9e1ddfab96393122cc58d"}, + {file = "mypy-0.931-cp36-cp36m-win_amd64.whl", hash = "sha256:1b65714dc296a7991000b6ee59a35b3f550e0073411ac9d3202f6516621ba66c"}, + {file = "mypy-0.931-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e839191b8da5b4e5d805f940537efcaa13ea5dd98418f06dc585d2891d228cf0"}, + {file = "mypy-0.931-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:50c7346a46dc76a4ed88f3277d4959de8a2bd0a0fa47fa87a4cde36fe247ac05"}, + {file = "mypy-0.931-cp37-cp37m-win_amd64.whl", hash = "sha256:d8f1ff62f7a879c9fe5917b3f9eb93a79b78aad47b533911b853a757223f72e7"}, + {file = "mypy-0.931-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f9fe20d0872b26c4bba1c1be02c5340de1019530302cf2dcc85c7f9fc3252ae0"}, + {file = "mypy-0.931-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1b06268df7eb53a8feea99cbfff77a6e2b205e70bf31743e786678ef87ee8069"}, + {file = "mypy-0.931-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8c11003aaeaf7cc2d0f1bc101c1cc9454ec4cc9cb825aef3cafff8a5fdf4c799"}, + {file = "mypy-0.931-cp38-cp38-win_amd64.whl", hash = "sha256:d9d2b84b2007cea426e327d2483238f040c49405a6bf4074f605f0156c91a47a"}, + {file = "mypy-0.931-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ff3bf387c14c805ab1388185dd22d6b210824e164d4bb324b195ff34e322d166"}, + {file = "mypy-0.931-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b56154f8c09427bae082b32275a21f500b24d93c88d69a5e82f3978018a0266"}, + {file = "mypy-0.931-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8ca7f8c4b1584d63c9a0f827c37ba7a47226c19a23a753d52e5b5eddb201afcd"}, + {file = "mypy-0.931-cp39-cp39-win_amd64.whl", hash = "sha256:74f7eccbfd436abe9c352ad9fb65872cc0f1f0a868e9d9c44db0893440f0c697"}, + {file = "mypy-0.931-py3-none-any.whl", hash = "sha256:1171f2e0859cfff2d366da2c7092b06130f232c636a3f7301e3feb8b41f6377d"}, + {file = "mypy-0.931.tar.gz", hash = "sha256:0038b21890867793581e4cb0d810829f5fd4441aa75796b53033af3aa30430ce"}, ] mypy-extensions = [ {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, @@ -5709,6 +5699,10 @@ types-pytz = [ {file = "types-pytz-2021.3.5.tar.gz", hash = "sha256:fef8de238ee95135952229a2a23bfb87bd63d5a6c8598106a46cfcf48f069ea8"}, {file = "types_pytz-2021.3.5-py3-none-any.whl", hash = "sha256:8831f689379ac9e2a62668157381379ed74b3702980e08e71f8673c179c4e3c7"}, ] +types-redis = [ + {file = "types-redis-4.1.16.tar.gz", hash = "sha256:a913521c1f008775fc3816813a5981f9da3b0dd3f3b2578b0a0464a84ac5f4d4"}, + {file = "types_redis-4.1.16-py3-none-any.whl", hash = "sha256:a529fbae3b6c95e6790522d35a3065dc92ee29698c6b163ab573992b6144b41a"}, +] types-requests = [ {file = "types-requests-2.27.9.tar.gz", hash = "sha256:7368974534d297939492efdfdab232930440b11e2203f6df1f0c40e3242a87ea"}, {file = "types_requests-2.27.9-py3-none-any.whl", hash = "sha256:74070045418faf710f3154403d6a16c9e67db50e5119906ca6955f1658d20f7b"}, diff --git a/pyproject.toml b/pyproject.toml index cadd331e735c..d8113e4cdf63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -149,8 +149,8 @@ towncrier = "^21.3.0" toml = "^0.10.0" pep440-version-utils = "^0.3.0" pydoc-markdown = "^3.10.3" -pytest-timeout = "^2.1.0" -mypy = "^0.910" +pytest-timeout = "^1.4.2" +mypy = "^0.931" bandit = "^1.6.3" types-pkg-resources = "^0.1.3" types-pytz = "^2021.1.0" @@ -164,6 +164,7 @@ sanic-testing = "^0.8.0" analytics-python = "^1.4.0" datadog-api-client = "^1.7.0" datadog = "^0.43.0" +types-redis = "^4.1.16" [tool.poetry.extras] spacy = [ "spacy",] diff --git a/rasa/cli/data.py b/rasa/cli/data.py index 9c6d00b952ff..3b56cc620815 100644 --- a/rasa/cli/data.py +++ b/rasa/cli/data.py @@ -197,7 +197,6 @@ def validate_stories(args: argparse.Namespace) -> None: def _validate_domain(validator: "Validator") -> bool: return ( validator.verify_domain_validity() - and validator.verify_domain_duplicates() and validator.verify_actions_in_stories_rules() and validator.verify_forms_in_stories_rules() and validator.verify_form_slots() diff --git a/rasa/core/actions/action.py b/rasa/core/actions/action.py index fcf621d10db2..87d286218a77 100644 --- a/rasa/core/actions/action.py +++ b/rasa/core/actions/action.py @@ -1,7 +1,17 @@ import copy import json import logging -from typing import List, Text, Optional, Dict, Any, TYPE_CHECKING, Tuple, Set, Union +from typing import ( + List, + Text, + Optional, + Dict, + Any, + TYPE_CHECKING, + Tuple, + Set, + cast, +) import aiohttp import rasa.core @@ -387,10 +397,15 @@ def get_full_retrieval_name( Full retrieval name of the action if the last user utterance contains a response selector output, `None` otherwise. """ - if RESPONSE_SELECTOR_PROPERTY_NAME not in tracker.latest_message.parse_data: + latest_message = tracker.latest_message + + if latest_message is None: + return None + + if RESPONSE_SELECTOR_PROPERTY_NAME not in latest_message.parse_data: return None - response_selector_properties = tracker.latest_message.parse_data[ + response_selector_properties = latest_message.parse_data[ RESPONSE_SELECTOR_PROPERTY_NAME ] @@ -418,7 +433,12 @@ async def run( domain: "Domain", ) -> List[Event]: """Query the appropriate response and create a bot utterance with that.""" - response_selector_properties = tracker.latest_message.parse_data[ + latest_message = tracker.latest_message + + if latest_message is None: + return [] + + response_selector_properties = latest_message.parse_data[ RESPONSE_SELECTOR_PROPERTY_NAME ] @@ -719,7 +739,7 @@ async def run( logger.debug( "Calling action endpoint to run action '{}'.".format(self.name()) ) - response = await self.action_endpoint.request( + response: Any = await self.action_endpoint.request( json=json_body, method="post", timeout=DEFAULT_REQUEST_TIMEOUT ) @@ -1091,7 +1111,7 @@ async def _execute_validation_action( tracker: "DialogueStateTracker", domain: "Domain", ) -> List[Event]: - slot_events: List[Union[Event, SlotSet]] = [ + slot_events: List[SlotSet] = [ event for event in extraction_events if isinstance(event, SlotSet) ] @@ -1099,11 +1119,11 @@ async def _execute_validation_action( logger.debug(f"Validating extracted slots: {slot_candidates}") if ACTION_VALIDATE_SLOT_MAPPINGS not in domain.user_actions: - return slot_events + return cast(List[Event], slot_events) _tracker = DialogueStateTracker.from_events( tracker.sender_id, - tracker.events_after_latest_restart() + slot_events, + tracker.events_after_latest_restart() + cast(List[Event], slot_events), slots=domain.slots, ) validate_events = await self._run_custom_action( @@ -1159,7 +1179,7 @@ async def run( ) -> List[Event]: """Runs action. Please see parent class for the full docstring.""" slot_events: List[Event] = [] - executed_custom_actions = set() + executed_custom_actions: Set[Text] = set() user_slots = [ slot for slot in domain.slots if slot.name not in DEFAULT_SLOT_NAMES @@ -1267,6 +1287,8 @@ def extract_slot_value_from_predefined_mapping( elif should_fill_intent_slot or should_fill_trigger_slot: value = [mapping.get("value")] elif should_fill_text_slot: - value = [tracker.latest_message.text] + value = [ + tracker.latest_message.text if tracker.latest_message is not None else None + ] return value diff --git a/rasa/core/actions/forms.py b/rasa/core/actions/forms.py index d1a66c273995..dd88b68aa18d 100644 --- a/rasa/core/actions/forms.py +++ b/rasa/core/actions/forms.py @@ -104,8 +104,8 @@ def get_mappings_for_slot( If None, map requested slot to an entity with the same name """ - domain_slots = domain.as_dict().get(KEY_SLOTS) - requested_slot_mappings = domain_slots.get(slot_to_fill).get("mappings") + domain_slots = domain.as_dict().get(KEY_SLOTS, {}) + requested_slot_mappings = domain_slots.get(slot_to_fill, {}).get("mappings", []) # check provided slot mappings for requested_slot_mapping in requested_slot_mappings: @@ -147,11 +147,11 @@ def _create_unique_entity_mappings(self, domain: Domain) -> Set[Text]: Returns: A set of json dumps of unique mappings of type `from_entity`. """ - unique_entity_slot_mappings = set() - duplicate_entity_slot_mappings = set() - domain_slots = domain.as_dict().get(KEY_SLOTS) + unique_entity_slot_mappings: Set[Text] = set() + duplicate_entity_slot_mappings: Set[Text] = set() + domain_slots = domain.as_dict().get(KEY_SLOTS, {}) for slot in domain.required_slots_for_form(self.name()): - for slot_mapping in domain_slots.get(slot).get(SLOT_MAPPINGS): + for slot_mapping in domain_slots.get(slot, {}).get(SLOT_MAPPINGS, []): if slot_mapping.get(MAPPING_TYPE) == str(SlotMappingType.FROM_ENTITY): mapping_as_string = json.dumps(slot_mapping, sort_keys=True) if mapping_as_string in unique_entity_slot_mappings: @@ -360,7 +360,7 @@ def _get_slot_extractions( events_since_last_user_uttered = FormAction._get_events_since_last_user_uttered( tracker ) - slot_values = {} + slot_values: Dict[Text, Any] = {} required_slots = self._add_dynamic_slots_requested_by_dynamic_forms( tracker, domain @@ -504,8 +504,8 @@ async def _ask_for_slot( ) -> List[Event]: logger.debug(f"Request next slot '{slot_name}'") - action_to_ask_for_next_slot = self._name_of_utterance(domain, slot_name) - if not action_to_ask_for_next_slot: + action_name_to_ask_for_next_slot = self._name_of_utterance(domain, slot_name) + if not action_name_to_ask_for_next_slot: # Use a debug log as the user might have asked as part of a custom action logger.debug( f"There was no action found to ask for slot '{slot_name}' " @@ -514,7 +514,7 @@ async def _ask_for_slot( return [] action_to_ask_for_next_slot = action.action_for_name_or_text( - action_to_ask_for_next_slot, domain, self.action_endpoint + action_name_to_ask_for_next_slot, domain, self.action_endpoint ) return await action_to_ask_for_next_slot.run( output_channel, nlg, tracker, domain diff --git a/rasa/core/agent.py b/rasa/core/agent.py index 444cb67b76e0..b0f1ff018205 100644 --- a/rasa/core/agent.py +++ b/rasa/core/agent.py @@ -19,7 +19,7 @@ from rasa.core.exceptions import AgentNotReady from rasa.shared.constants import DEFAULT_SENDER_ID from rasa.core.lock_store import InMemoryLockStore, LockStore -from rasa.core.nlg import NaturalLanguageGenerator +from rasa.core.nlg import NaturalLanguageGenerator, TemplatedNaturalLanguageGenerator from rasa.core.policies.policy import PolicyPrediction from rasa.core.processor import MessageProcessor from rasa.core.tracker_store import FailSafeTrackerStore, InMemoryTrackerStore @@ -292,7 +292,7 @@ class Agent: def __init__( self, - domain: Optional[Union[Text, Domain]] = None, + domain: Optional[Domain] = None, generator: Union[EndpointConfig, NaturalLanguageGenerator, None] = None, tracker_store: Optional[TrackerStore] = None, lock_store: Optional[LockStore] = None, @@ -320,7 +320,7 @@ def __init__( def load( cls, model_path: Union[Text, Path], - domain: Optional[Union[Text, Domain]] = None, + domain: Optional[Domain] = None, generator: Union[EndpointConfig, NaturalLanguageGenerator, None] = None, tracker_store: Optional[TrackerStore] = None, lock_store: Optional[LockStore] = None, @@ -363,7 +363,7 @@ def load_model( # update domain on all instances self.tracker_store.domain = self.domain - if hasattr(self.nlg, "responses"): + if isinstance(self.nlg, TemplatedNaturalLanguageGenerator): self.nlg.responses = self.domain.responses if self.domain else {} @property @@ -405,7 +405,8 @@ async def parse_message(self, message_data: Text) -> Dict[Text, Any]: """ message = UserMessage(message_data) - return await self.processor.parse_message(message) + + return await self.processor.parse_message(message) # type: ignore[union-attr] async def handle_message( self, message: UserMessage @@ -416,14 +417,18 @@ async def handle_message( return None async with self.lock_store.lock(message.sender_id): - return await self.processor.handle_message(message) + return await self.processor.handle_message( # type: ignore[union-attr] + message + ) @agent_must_be_ready async def predict_next_for_sender_id( self, sender_id: Text ) -> Optional[Dict[Text, Any]]: """Predict the next action for a sender id.""" - return await self.processor.predict_next_for_sender_id(sender_id) + return await self.processor.predict_next_for_sender_id( # type: ignore[union-attr] # noqa:E501 + sender_id + ) @agent_must_be_ready def predict_next_with_tracker( @@ -432,12 +437,14 @@ def predict_next_with_tracker( verbosity: EventVerbosity = EventVerbosity.AFTER_RESTART, ) -> Optional[Dict[Text, Any]]: """Predicts the next action.""" - return self.processor.predict_next_with_tracker(tracker, verbosity) + return self.processor.predict_next_with_tracker( # type: ignore[union-attr] + tracker, verbosity + ) @agent_must_be_ready async def log_message(self, message: UserMessage) -> DialogueStateTracker: """Append a message to a dialogue - does not predict actions.""" - return await self.processor.log_message(message) + return await self.processor.log_message(message) # type: ignore[union-attr] @agent_must_be_ready async def execute_action( @@ -452,7 +459,7 @@ async def execute_action( prediction = PolicyPrediction.for_action_name( self.domain, action, policy, confidence or 0.0 ) - return await self.processor.execute_action( + return await self.processor.execute_action( # type: ignore[union-attr] sender_id, action, output_channel, self.nlg, prediction ) @@ -465,7 +472,7 @@ async def trigger_intent( tracker: DialogueStateTracker, ) -> None: """Trigger a user intent, e.g. triggered by an external event.""" - await self.processor.trigger_external_user_uttered( + await self.processor.trigger_external_user_uttered( # type: ignore[union-attr] intent_name, entities, tracker, output_channel ) diff --git a/rasa/core/brokers/kafka.py b/rasa/core/brokers/kafka.py index 878d93e381b6..adb829251e03 100644 --- a/rasa/core/brokers/kafka.py +++ b/rasa/core/brokers/kafka.py @@ -2,7 +2,7 @@ import json import logging from asyncio import AbstractEventLoop -from typing import Any, Text, List, Optional, Union, Dict +from typing import Any, Text, List, Optional, Union, Dict, TYPE_CHECKING import time from rasa.core.brokers.broker import EventBroker @@ -11,6 +11,9 @@ from rasa.shared.exceptions import RasaException import rasa.shared.utils.common +if TYPE_CHECKING: + from kafka import KafkaProducer + logger = logging.getLogger(__name__) @@ -105,7 +108,7 @@ def publish( ) -> None: """Publishes events.""" if self.producer is None: - self._create_producer() + self.producer = self._create_producer() connected = self.producer.bootstrap_connected() if connected: logger.debug("Connection to kafka successful.") @@ -125,7 +128,7 @@ def publish( if not connected: self._close() logger.debug("Connection to kafka lost, reconnecting...") - self._create_producer() + self.producer = self._create_producer() connected = self.producer.bootstrap_connected() if connected: logger.debug("Reconnection to kafka successful") @@ -135,7 +138,7 @@ def publish( logger.error("Failed to publish Kafka event.") - def _create_producer(self) -> None: + def _create_producer(self) -> "KafkaProducer": import kafka if self.security_protocol == "PLAINTEXT": @@ -175,7 +178,7 @@ def _create_producer(self) -> None: ) try: - self.producer = kafka.KafkaProducer( + return kafka.KafkaProducer( client_id=self.client_id, bootstrap_servers=self.url, value_serializer=lambda v: json.dumps(v).encode(DEFAULT_ENCODING), @@ -206,10 +209,14 @@ def _publish(self, event: Dict[Text, Any]) -> None: f" key={partition_key!s}, headers={headers})" ) - self.producer.send(self.topic, value=event, key=partition_key, headers=headers) + if self.producer is not None: + self.producer.send( + self.topic, value=event, key=partition_key, headers=headers + ) def _close(self) -> None: - self.producer.close() + if self.producer is not None: + self.producer.close() @rasa.shared.utils.common.lazy_property def rasa_environment(self) -> Optional[Text]: diff --git a/rasa/core/brokers/pika.py b/rasa/core/brokers/pika.py index b094f6affff2..205a3bbca030 100644 --- a/rasa/core/brokers/pika.py +++ b/rasa/core/brokers/pika.py @@ -244,6 +244,9 @@ async def close(self) -> None: def is_ready(self) -> bool: """Return `True` if a connection was established.""" + if self._connection is None: + return False + return not self._connection.is_closed def publish( @@ -262,6 +265,9 @@ def publish( async def _publish( self, event: Dict[Text, Any], headers: Optional[Dict[Text, Text]] = None ) -> None: + if self._exchange is None: + return + try: await self._exchange.publish(self._message(event, headers), "") diff --git a/rasa/core/channels/channel.py b/rasa/core/channels/channel.py index 5fca3ad9868e..2efd680b183f 100644 --- a/rasa/core/channels/channel.py +++ b/rasa/core/channels/channel.py @@ -330,10 +330,12 @@ class CollectingOutputChannel(OutputChannel): (doesn't send them anywhere, just collects them).""" def __init__(self) -> None: - self.messages = [] + """Initialise list to collect messages.""" + self.messages: List[Dict[Text, Any]] = [] @classmethod def name(cls) -> Text: + """Name of the channel.""" return "collector" @staticmethod diff --git a/rasa/core/channels/console.py b/rasa/core/channels/console.py index 00ed6b2f7a4b..520a385b7a8e 100644 --- a/rasa/core/channels/console.py +++ b/rasa/core/channels/console.py @@ -3,13 +3,12 @@ import json import logging import os +from typing import Any, AsyncGenerator, Dict, List, Optional, Text import aiohttp import questionary from aiohttp import ClientTimeout from prompt_toolkit.styles import Style -from typing import Any, Generator -from typing import Text, Optional, Dict, List import rasa.shared.utils.cli import rasa.shared.utils.io @@ -130,7 +129,7 @@ async def _send_message_receive_stream( sender_id: Text, message: Text, request_timeout: Optional[int] = None, -) -> Generator[Dict[Text, Any], None, None]: +) -> AsyncGenerator[Dict[Text, Any], None]: payload = {"sender": sender_id, "message": message} url = f"{server_url}/webhooks/rest/webhook?stream=true&token={auth_token}" @@ -189,11 +188,11 @@ async def record_messages( break if use_response_stream: - bot_responses = _send_message_receive_stream( + bot_responses_stream = _send_message_receive_stream( server_url, auth_token, sender_id, text, request_timeout=request_timeout ) previous_response = None - async for response in bot_responses: + async for response in bot_responses_stream: if previous_response is not None: _print_bot_output(previous_response) previous_response = response diff --git a/rasa/core/channels/hangouts.py b/rasa/core/channels/hangouts.py index 1a7c19a96867..093ac3f93658 100644 --- a/rasa/core/channels/hangouts.py +++ b/rasa/core/channels/hangouts.py @@ -33,7 +33,7 @@ def name(cls) -> Text: def __init__(self) -> None: """Starts messages as empty dictionary.""" - self.messages = {} + self.messages: Dict[Text, Any] = {} @staticmethod def _text_card(message: Dict[Text, Any]) -> Dict: @@ -297,7 +297,7 @@ async def health(request: Request) -> HTTPResponse: async def receive(request: Request) -> HTTPResponse: if self.project_id: - token = request.headers.get("Authorization").replace("Bearer ", "") + token = request.headers.get("Authorization", "").replace("Bearer ", "") self._check_token(token) sender_id = self._extract_sender(request) diff --git a/rasa/core/channels/rest.py b/rasa/core/channels/rest.py index f70c5edb71a5..66b05fb09130 100644 --- a/rasa/core/channels/rest.py +++ b/rasa/core/channels/rest.py @@ -67,7 +67,7 @@ def stream_response( metadata: Optional[Dict[Text, Any]], ) -> Callable[[Any], Awaitable[None]]: async def stream(resp: Any) -> None: - q = Queue() + q: Queue = Queue() task = asyncio.ensure_future( self.on_message_wrapper( on_new_message, text, q, sender_id, input_channel, metadata @@ -86,9 +86,16 @@ async def stream(resp: Any) -> None: def blueprint( self, on_new_message: Callable[[UserMessage], Awaitable[None]] ) -> Blueprint: + """Groups the collection of endpoints used by rest channel.""" + module_type = inspect.getmodule(self) + if module_type is not None: + module_name = module_type.__name__ + else: + module_name = None + custom_webhook = Blueprint( "custom_webhook_{}".format(type(self).__name__), - inspect.getmodule(self).__name__, + module_name, ) # noinspection PyUnusedLocal @@ -145,8 +152,11 @@ class QueueOutputChannel(CollectingOutputChannel): (doesn't send them anywhere, just collects them).""" + messages: Queue + @classmethod def name(cls) -> Text: + """Name of QueueOutputChannel.""" return "queue" # noinspection PyMissingConstructor diff --git a/rasa/core/channels/slack.py b/rasa/core/channels/slack.py index 4a8570368d89..644bd09a6c3f 100644 --- a/rasa/core/channels/slack.py +++ b/rasa/core/channels/slack.py @@ -93,15 +93,17 @@ async def send_text_with_buttons( ) return await self.send_text_message(recipient, text, **kwargs) - button_block = {"type": "actions", "elements": []} - for button in buttons: - button_block["elements"].append( + button_block = { + "type": "actions", + "elements": [ { "type": "button", "text": {"type": "plain_text", "text": button["title"]}, "value": button["payload"], } - ) + for button in buttons + ], + } await self._post_message( channel=recipient, diff --git a/rasa/core/channels/socketio.py b/rasa/core/channels/socketio.py index 3753a8fac897..70474f9320c4 100644 --- a/rasa/core/channels/socketio.py +++ b/rasa/core/channels/socketio.py @@ -83,14 +83,14 @@ async def send_text_with_buttons( messages = [{"text": message, "quick_replies": []} for message in message_parts] # attach all buttons to the last text fragment - for button in buttons: - messages[-1]["quick_replies"].append( - { - "content_type": "text", - "title": button["title"], - "payload": button["payload"], - } - ) + messages[-1]["quick_replies"] = [ + { + "content_type": "text", + "title": button["title"], + "payload": button["payload"], + } + for button in buttons + ] for message in messages: await self._send_message(recipient_id, message) diff --git a/rasa/core/channels/twilio_voice.py b/rasa/core/channels/twilio_voice.py index 5131910f55c9..85d1c9dbb7b3 100644 --- a/rasa/core/channels/twilio_voice.py +++ b/rasa/core/channels/twilio_voice.py @@ -117,9 +117,9 @@ def __init__( initial_prompt: Optional[Text], reprompt_fallback_phrase: Optional[Text], assistant_voice: Optional[Text], - speech_timeout: Optional[Text], - speech_model: Optional[Text], - enhanced: Optional[Text], + speech_timeout: Text = "5", + speech_model: Text = "default", + enhanced: Text = "false", ) -> None: """Creates a connection to Twilio voice. @@ -154,16 +154,21 @@ def _validate_configuration(self) -> None: if self.speech_model not in self.SUPPORTED_SPEECH_MODELS: self._raise_invalid_speech_model_exception() - if self.enhanced.lower() not in ["true", "false"]: + if self.enhanced.lower() not in [ + "true", + "false", + ]: self._raise_invalid_enhanced_option_exception() - if (self.enhanced.lower() == "true") and ( - self.speech_model.lower() != "phone_call" + if ( + self.enhanced.lower() == "true" + and self.speech_model.lower() != "phone_call" ): self._raise_invalid_enhanced_speech_model_exception() - if (self.speech_model.lower() != "numbers_and_commands") and ( - self.speech_timeout.lower() == "auto" + if ( + self.speech_model.lower() != "numbers_and_commands" + and self.speech_timeout.lower() == "auto" ): self._raise_invalid_speech_model_timeout_exception() diff --git a/rasa/core/evaluation/marker_base.py b/rasa/core/evaluation/marker_base.py index 225af97c7031..6f2cb806dcfb 100644 --- a/rasa/core/evaluation/marker_base.py +++ b/rasa/core/evaluation/marker_base.py @@ -11,6 +11,7 @@ Tuple, Type, TypeVar, + TYPE_CHECKING, Union, Any, ) @@ -36,6 +37,9 @@ import csv import os.path +if TYPE_CHECKING: + from rasa.core.evaluation.marker import OrMarker + logger = logging.getLogger(__name__) @@ -273,9 +277,7 @@ def max_depth(self) -> int: """Gets the maximum depth from this point in the marker tree.""" ... - def evaluate_events( - self, events: List[Event], recursive: bool = False - ) -> List[SessionEvaluation]: + def evaluate_events(self, events: List[Event]) -> List[SessionEvaluation]: """Resets the marker, tracks all events, and collects some information. The collected information includes: @@ -285,21 +287,15 @@ def evaluate_events( If this marker is the special `ANY_MARKER` (identified by its name), then results will be collected for all (immediate) sub-markers. - If `recursive` is set to `True`, then all included markers are evaluated. - Args: events: a list of events describing a conversation - recursive: set this to `True` to collect evaluations for all markers that - this marker consists of Returns: a list that contains, for each session contained in the tracker, a dictionary mapping that maps marker names to meta data of relevant events """ # determine which marker to extract results from - if recursive: - markers_to_be_evaluated = [marker for marker in self] - elif isinstance(self, OperatorMarker) and self.name == Marker.ANY_MARKER: + if isinstance(self, OperatorMarker) and self.name == Marker.ANY_MARKER: markers_to_be_evaluated = self.sub_markers else: markers_to_be_evaluated = [self] @@ -395,7 +391,7 @@ def relevant_events(self) -> List[int]: return [idx for (idx, applies) in enumerate(self.history) if applies] @classmethod - def from_path(cls, path: Union[Path, Text]) -> Marker: + def from_path(cls, path: Union[Path, Text]) -> "OrMarker": """Loads markers from one config file or all config files in a directory tree. Each config file should contain a dictionary mapping marker names to the @@ -493,7 +489,7 @@ def _collect_yaml_files_from_path(path: Union[Text, Path]) -> List[Text]: @staticmethod def _collect_configs_from_yaml_files(yaml_files: List[Text]) -> Dict[Text, Dict]: - marker_names = set() + marker_names: Set[Text] = set() loaded_configs: Dict[Text, Dict] = {} for yaml_file in yaml_files: loaded_config = rasa.shared.utils.io.read_yaml_file(yaml_file) diff --git a/rasa/core/evaluation/marker_stats.py b/rasa/core/evaluation/marker_stats.py index 2812a0a811ab..737263fd524c 100644 --- a/rasa/core/evaluation/marker_stats.py +++ b/rasa/core/evaluation/marker_stats.py @@ -72,7 +72,7 @@ def _add_num_user_turns_str_to(stat_name: Text) -> Text: def __init__(self) -> None: """Creates a new marker statistics object.""" # to ensure consistency of processed rows - self._marker_names = [] + self._marker_names: List[Text] = [] # (1) For collecting the per-session analysis: # NOTE: we could stream / compute them later instead of collecting them... diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index c9e657aed473..646dd29bbbf1 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -39,9 +39,9 @@ class SingleStateFeaturizer: def __init__(self) -> None: """Initialize the single state featurizer.""" - self._default_feature_states = {} - self.action_texts = [] - self.entity_tag_specs = [] + self._default_feature_states: Dict[Text, Any] = {} + self.action_texts: List[Text] = [] + self.entity_tag_specs: List[EntityTagSpec] = [] def _create_entity_tag_specs( self, bilou_tagging: bool = False @@ -309,9 +309,11 @@ def encode_entities( ): # we cannot build a classifier with fewer than 2 classes return {} - - message = precomputations.lookup_message(user_text=entity_data[TEXT]) - message.data[ENTITIES] = entity_data[ENTITIES] + if precomputations is None: + message = None + else: + message = precomputations.lookup_message(user_text=entity_data[TEXT]) + message.data[ENTITIES] = entity_data[ENTITIES] if not message: return {} diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py index 1abb310f4de3..7375191e576c 100644 --- a/rasa/core/featurizers/tracker_featurizers.py +++ b/rasa/core/featurizers/tracker_featurizers.py @@ -105,13 +105,16 @@ def _featurize_states( Returns: Featurized tracker states. """ - return [ - [ - self.state_featurizer.encode_state(state, precomputations) - for state in tracker_states + if self.state_featurizer is None: + return [[{}]] + else: + return [ + [ + self.state_featurizer.encode_state(state, precomputations) + for state in tracker_states + ] + for tracker_states in trackers_as_states ] - for tracker_states in trackers_as_states - ] @staticmethod def _convert_labels_to_ids( @@ -152,15 +155,18 @@ def _create_entity_tags( Returns: Trackers as entity features. """ - return [ - [ - self.state_featurizer.encode_entities( - entity_data, precomputations, bilou_tagging - ) - for entity_data in trackers_entities + if self.state_featurizer is None: + return [[{}]] + else: + return [ + [ + self.state_featurizer.encode_entities( + entity_data, precomputations, bilou_tagging + ) + for entity_data in trackers_entities + ] + for trackers_entities in trackers_as_entities ] - for trackers_entities in trackers_as_entities - ] @staticmethod def _entity_data(event: UserUttered) -> Dict[Text, Any]: @@ -1056,7 +1062,7 @@ def _extract_examples( tracker_states[:label_index], self.max_history ) label = [event.intent_name or event.text] - entities = [{}] + entities: List[Dict[Text, Any]] = [{}] yield sliced_states, label, entities diff --git a/rasa/core/lock.py b/rasa/core/lock.py index d148440001df..7195ecbb1794 100644 --- a/rasa/core/lock.py +++ b/rasa/core/lock.py @@ -23,13 +23,11 @@ def as_dict(self) -> Dict[Text, Any]: def dumps(self) -> Text: """Return json dump of `Ticket` as dictionary.""" - return json.dumps(self.as_dict()) @classmethod def from_dict(cls, data: Dict[Text, Union[int, float]]) -> "Ticket": """Creates `Ticket` from dictionary.""" - return cls(number=data["number"], expires=data["expires"]) def __repr__(self) -> Text: @@ -53,13 +51,11 @@ def __init__( @classmethod def from_dict(cls, data: Dict[Text, Any]) -> "TicketLock": """Create `TicketLock` from dictionary.""" - - tickets = [Ticket.from_dict(json.loads(d)) for d in data.get("tickets")] + tickets = [Ticket.from_dict(json.loads(d)) for d in data.get("tickets", [])] return cls(data.get("conversation_id"), deque(tickets)) def dumps(self) -> Text: """Return json dump of `TicketLock`.""" - tickets = [ticket.dumps() for ticket in self.tickets] return json.dumps(dict(conversation_id=self.conversation_id, tickets=tickets)) @@ -69,12 +65,10 @@ def is_locked(self, ticket_number: int) -> bool: Returns: True if `now_serving` is not equal to `ticket`. """ - return self.now_serving != ticket_number def issue_ticket(self, lifetime: float) -> int: """Issue a new ticket and return its number.""" - self.remove_expired_tickets() number = self.last_issued + 1 ticket = Ticket(number, time.time() + lifetime) @@ -84,7 +78,6 @@ def issue_ticket(self, lifetime: float) -> int: def remove_expired_tickets(self) -> None: """Remove expired tickets.""" - # iterate over copy of self.tickets so we can remove items for ticket in list(self.tickets): if ticket.has_expired(): @@ -98,7 +91,6 @@ def last_issued(self) -> int: Number of `Ticket` that was last added. `NO_TICKET_ISSUED` if no tickets exist. """ - ticket_number = self._ticket_number_for(-1) return ticket_number if ticket_number is not None else NO_TICKET_ISSUED @@ -110,7 +102,6 @@ def now_serving(self) -> Optional[int]: Returns: Number of `Ticket` that is served next. 0 if no `Ticket` exists. """ - return self._ticket_number_for(0) or 0 def _ticket_number_for(self, ticket_index: int) -> Optional[int]: @@ -120,7 +111,6 @@ def _ticket_number_for(self, ticket_index: int) -> Optional[int]: Ticket number for `Ticket` with index `ticket_index`. None if there are no tickets, or if `ticket_index` is out of bounds of `self.tickets`. """ - self.remove_expired_tickets() try: @@ -130,7 +120,6 @@ def _ticket_number_for(self, ticket_index: int) -> Optional[int]: def _ticket_for_ticket_number(self, ticket_number: int) -> Optional[Ticket]: """Return ticket for `ticket_number`.""" - self.remove_expired_tickets() return next((t for t in self.tickets if t.number == ticket_number), None) @@ -141,12 +130,10 @@ def is_someone_waiting(self) -> bool: Returns: True if the `self.tickets` queue has length greater than 0. """ - return len(self.tickets) > 0 def remove_ticket_for(self, ticket_number: int) -> None: """Remove `Ticket` for `ticket_number.""" - ticket = self._ticket_for_ticket_number(ticket_number) if ticket: self.tickets.remove(ticket) diff --git a/rasa/core/lock_store.py b/rasa/core/lock_store.py index ac78da3df30f..e691b2ff2c48 100644 --- a/rasa/core/lock_store.py +++ b/rasa/core/lock_store.py @@ -4,7 +4,7 @@ import os from async_generator import asynccontextmanager -from typing import Text, Union, Optional, AsyncGenerator +from typing import AsyncGenerator, Dict, Optional, Text, Union from rasa.shared.exceptions import RasaException, ConnectionException import rasa.shared.utils.common @@ -274,19 +274,23 @@ class InMemoryLockStore(LockStore): """In-memory store for ticket locks.""" def __init__(self) -> None: - self.conversation_locks = {} + """Initialise dictionary of locks.""" + self.conversation_locks: Dict[Text, TicketLock] = {} super().__init__() def get_lock(self, conversation_id: Text) -> Optional[TicketLock]: + """Get lock for conversation if it exists.""" return self.conversation_locks.get(conversation_id) def delete_lock(self, conversation_id: Text) -> None: + """Delete lock for conversation.""" deleted_lock = self.conversation_locks.pop(conversation_id, None) self._log_deletion( conversation_id, deletion_successful=deleted_lock is not None ) def save_lock(self, lock: TicketLock) -> None: + """Save lock in store.""" self.conversation_locks[lock.conversation_id] = lock diff --git a/rasa/core/migrate.py b/rasa/core/migrate.py index c454d9f444ae..fcc8f1998a67 100644 --- a/rasa/core/migrate.py +++ b/rasa/core/migrate.py @@ -3,6 +3,8 @@ from pathlib import Path from typing import List, Dict, Text, Any, Tuple, Optional, Union +from ruamel.yaml.scalarstring import DoubleQuotedScalarString + import rasa.shared.utils.io import rasa.shared.utils.cli from rasa.shared.constants import REQUIRED_SLOTS_KEY, IGNORED_INTENTS @@ -13,19 +15,21 @@ MAPPING_TYPE, SLOT_MAPPINGS, ) +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.core.domain import KEY_ENTITIES, KEY_SLOTS, KEY_FORMS, Domain from rasa.shared.exceptions import RasaException +from rasa.shared.utils.validation import KEY_TRAINING_DATA_FORMAT_VERSION ORIGINAL_DOMAIN = "original_domain" # not a default, fixed DEFAULT_NEW_DOMAIN = "new_domain" YML_SUFFIX = ".yml" -def _create_back_up( - domain_file: Path, backup_location: Path -) -> Union[List[Any], Dict[Text, Any]]: +def _create_back_up(domain_file: Path, backup_location: Path) -> Dict[Text, Any]: """Makes a backup and returns the content of the file.""" - original_content = rasa.shared.utils.io.read_yaml_file(domain_file) + original_content = rasa.shared.utils.io.read_yaml( + rasa.shared.utils.io.read_file(domain_file) + ) rasa.shared.utils.io.write_yaml( original_content, backup_location, should_preserve_key_order=True ) @@ -164,7 +168,9 @@ def _migrate_auto_fill_and_custom_slots( def _assemble_new_domain( domain_file: Path, new_forms: Dict[Text, Any], new_slots: Dict[Text, Any] ) -> Dict[Text, Any]: - original_content = rasa.shared.utils.io.read_yaml_file(domain_file) + original_content = rasa.shared.utils.io.read_yaml( + rasa.shared.utils.io.read_file(domain_file) + ) new_domain: Dict[Text, Any] = {} for key, value in original_content.items(): if key == KEY_SLOTS: @@ -172,7 +178,9 @@ def _assemble_new_domain( elif key == KEY_FORMS: new_domain.update({key: new_forms}) elif key == "version": - new_domain.update({key: '"3.0"'}) + new_domain.update( + {key: DoubleQuotedScalarString(LATEST_TRAINING_DATA_FORMAT_VERSION)} + ) else: new_domain.update({key: value}) return new_domain @@ -205,9 +213,9 @@ def _migrate_domain_files( backup_location: where to backup all domain files out_path: location where to store the migrated files """ - slots = {} - forms = {} - entities = [] + slots: Dict[Text, Any] = {} + forms: Dict[Text, Any] = {} + entities: List[Any] = [] domain_files = [ file for file in domain_path.iterdir() if Domain.is_domain_file(file) @@ -226,7 +234,13 @@ def _migrate_domain_files( if KEY_SLOTS not in original_content and KEY_FORMS not in original_content: if isinstance(original_content, dict): - original_content.update({"version": '"3.0"'}) + original_content.update( + { + "version": DoubleQuotedScalarString( + LATEST_TRAINING_DATA_FORMAT_VERSION + ) + } + ) # this is done so that the other domain files can be moved # in the migrated directory @@ -250,7 +264,7 @@ def _migrate_domain_files( slots.update(original_content.get(KEY_SLOTS, {})) forms.update(original_content.get(KEY_FORMS, {})) - entities.extend(original_content.get(KEY_ENTITIES, {})) + entities.extend(original_content.get(KEY_ENTITIES, [])) if not slots or not forms: raise RasaException( @@ -315,15 +329,28 @@ def migrate_domain_format( # Note: we do not enforce that the version tag is 2.0 everywhere + validate that # migrate-able domain files are among these files later original_files = ( - [file for file in domain_path.iterdir() if Domain.is_domain_file(file)] + { + file: rasa.shared.utils.io.read_yaml_file(file) + for file in domain_path.iterdir() + if Domain.is_domain_file(file) + } if domain_path.is_dir() - else [domain_path] + else {domain_path: rasa.shared.utils.io.read_yaml_file(domain_path)} ) - migrated_files = [ - file - for file in original_files - if rasa.shared.utils.io.read_yaml_file(file).get("version") == "3.0" - ] + migrated_files = [] + + for file, file_dict in original_files.items(): + if not isinstance(file_dict, dict): + raise RasaException( + f"The file {file} could not be read " + f"as an eligible domain dictionary. " + f"Please make sure you have included " + f"only eligible domain files." + ) + + if file_dict.get(KEY_TRAINING_DATA_FORMAT_VERSION) == "3.0": + migrated_files.append(file) + if migrated_files: raise RasaException( f"Some of the given files ({[file for file in migrated_files]}) " diff --git a/rasa/core/nlg/response.py b/rasa/core/nlg/response.py index 2da05d2130af..4db27427e147 100644 --- a/rasa/core/nlg/response.py +++ b/rasa/core/nlg/response.py @@ -30,7 +30,7 @@ def _matches_filled_slots( self, filled_slots: Dict[Text, Any], response: Dict[Text, Any] ) -> bool: """Checks if the conditional response variation matches the filled slots.""" - constraints = response.get(RESPONSE_CONDITION) + constraints = response.get(RESPONSE_CONDITION, []) for constraint in constraints: name = constraint["name"] value = constraint["value"] diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py index 405578e25f52..251995fb29d3 100644 --- a/rasa/core/policies/memoization.py +++ b/rasa/core/policies/memoization.py @@ -104,7 +104,7 @@ def _create_lookup_from_states( Returns: lookup dictionary """ - lookup = {} + lookup: Dict[Text, Text] = {} if not trackers_as_states: return lookup @@ -213,7 +213,10 @@ def _prediction_result( ) -> List[float]: result = self._default_predictions(domain) if action_name: - if self.config["use_nlu_confidence_as_score"]: + if ( + self.config["use_nlu_confidence_as_score"] + and tracker.latest_message is not None + ): # the memoization will use the confidence of NLU on the latest # user message to set the confidence of the action score = tracker.latest_message.intent.get("confidence", 1.0) diff --git a/rasa/core/policies/rule_policy.py b/rasa/core/policies/rule_policy.py index f2c7251ea074..7e56a952e00d 100644 --- a/rasa/core/policies/rule_policy.py +++ b/rasa/core/policies/rule_policy.py @@ -159,7 +159,9 @@ def __init__( self._enable_fallback_prediction = config["enable_fallback_prediction"] self._check_for_contradictions = config["check_for_contradictions"] - self._rules_sources = defaultdict(list) + self._rules_sources: defaultdict[Text, List[Tuple[Text, Text]]] = defaultdict( + list + ) @classmethod def raise_if_incompatible_with_domain( @@ -190,7 +192,7 @@ def _is_rule_snippet_state(state: State) -> bool: return prev_action_name == RULE_SNIPPET_ACTION_NAME def _create_feature_key(self, states: List[State]) -> Optional[Text]: - new_states = [] + new_states: List[State] = [] for state in reversed(states): if self._is_rule_snippet_state(state): # remove all states before RULE_SNIPPET_ACTION_NAME @@ -493,13 +495,15 @@ def _collect_sources( tracker: TrackerWithCachedStates, predicted_action_name: Optional[Text], gold_action_name: Text, - prediction_source: Optional[Text], + prediction_source: Text, ) -> None: # we need to remember which action should be predicted by the rule # in order to correctly output the names of the contradicting rules rule_name = tracker.sender_id - if prediction_source.startswith(DEFAULT_RULES) or prediction_source.startswith( - LOOP_RULES + + if prediction_source is not None and ( + prediction_source.startswith(DEFAULT_RULES) + or prediction_source.startswith(LOOP_RULES) ): # the real gold action contradict the one in the rules in this case gold_action_name = predicted_action_name @@ -564,7 +568,14 @@ def _check_prediction( gold_action_name: Text, prediction_source: Optional[Text], ) -> List[Text]: - if not predicted_action_name or predicted_action_name == gold_action_name: + # FIXME: `predicted_action_name` and `prediction_source` are + # either None together or defined together. This could be improved + # by better typing in this class, but requires some refactoring + if ( + not predicted_action_name + or not prediction_source + or predicted_action_name == gold_action_name + ): return [] if self._should_delete(prediction_source, tracker, predicted_action_name): @@ -636,12 +647,13 @@ def _run_prediction_on_trackers( running_tracker, domain, gold_action_name ) if collect_sources: - self._collect_sources( - running_tracker, - predicted_action_name, - gold_action_name, - prediction_source, - ) + if prediction_source: + self._collect_sources( + running_tracker, + predicted_action_name, + gold_action_name, + prediction_source, + ) else: # to be able to remove only rules turns from the dialogue history # for ML policies, @@ -946,6 +958,7 @@ def _find_action_from_loop_happy_path( active_loop_rejected = tracker.active_loop.get(LOOP_REJECTED) should_predict_loop = ( not active_loop_rejected + and tracker.latest_action and tracker.latest_action.get(ACTION_NAME) != active_loop_name ) should_predict_listen = ( diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 3c6539d41636..a6f10bf1b948 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -12,6 +12,7 @@ from rasa.engine.graph import ExecutionContext from rasa.engine.storage.resource import Resource from rasa.engine.storage.storage import ModelStorage +from rasa.exceptions import ModelNotFound from rasa.nlu.constants import TOKENS_NAMES from rasa.nlu.extractors.extractor import EntityTagSpec, EntityExtractorMixin import rasa.core.actions.action @@ -412,7 +413,11 @@ def _create_label_data( ) -> Tuple[RasaModelData, List[Dict[Text, List[Features]]]]: # encode all label_ids with policies' featurizer state_featurizer = self.featurizer.state_featurizer - encoded_all_labels = state_featurizer.encode_all_labels(domain, precomputations) + encoded_all_labels = ( + state_featurizer.encode_all_labels(domain, precomputations) + if state_featurizer is not None + else [] + ) attribute_data, _ = convert_to_data_format( encoded_all_labels, featurizers=self.config[FEATURIZERS] @@ -616,7 +621,11 @@ def _prepare_for_training( ) if self.config[ENTITY_RECOGNITION]: - self._entity_tag_specs = self.featurizer.state_featurizer.entity_tag_specs + self._entity_tag_specs = ( + self.featurizer.state_featurizer.entity_tag_specs + if self.featurizer.state_featurizer is not None + else [] + ) # keep one example for persisting and loading self.data_example = model_data.first_data_example() @@ -665,6 +674,10 @@ def run_training( self.config[TENSORBOARD_LOG_LEVEL], self.tmp_checkpoint_dir, ) + + if self.model is None: + raise ModelNotFound("No model was detected prior to training.") + self.model.fit( data_generator, epochs=self.config[EPOCHS], @@ -876,7 +889,7 @@ def _create_optional_event_for_entities( # entities belong to the last message of the tracker # convert the predicted tags to actual entities - text = tracker.latest_message.text + text = tracker.latest_message.text if tracker.latest_message is not None else "" if precomputations is not None: parsed_message = precomputations.lookup_message(user_text=text) else: @@ -944,7 +957,8 @@ def persist_model_utilities(self, model_path: Path) -> None: model_path / f"{model_filename}.fake_features.pkl", self.fake_features ) rasa.utils.io.pickle_dump( - model_path / f"{model_filename}.label_data.pkl", dict(self._label_data.data) + model_path / f"{model_filename}.label_data.pkl", + dict(self._label_data.data) if self._label_data is not None else {}, ) entity_tag_specs = ( [tag_spec._asdict() for tag_spec in self._entity_tag_specs] diff --git a/rasa/core/policies/unexpected_intent_policy.py b/rasa/core/policies/unexpected_intent_policy.py index eae372ade64e..132dbaca48ab 100644 --- a/rasa/core/policies/unexpected_intent_policy.py +++ b/rasa/core/policies/unexpected_intent_policy.py @@ -421,7 +421,11 @@ def compute_label_quantiles_post_training( # Hence, we first filter out the attributes inside `model_data` # to keep only those which should be present during prediction. model_prediction_data = self._prepare_data_for_prediction(model_data) - prediction_scores = self.model.run_bulk_inference(model_prediction_data) + prediction_scores = ( + self.model.run_bulk_inference(model_prediction_data) + if self.model is not None + else {} + ) label_id_scores = self._collect_label_id_grouped_scores( prediction_scores, label_ids ) @@ -608,7 +612,12 @@ def predict_action_probabilities( sequence_similarities = all_similarities[:, -1, :] # Check for unlikely intent - query_intent = tracker.get_last_event_for(UserUttered).intent_name + last_user_uttered_event = tracker.get_last_event_for(UserUttered) + query_intent = ( + last_user_uttered_event.intent_name + if last_user_uttered_event is not None + else "" + ) is_unlikely_intent = self._check_unlikely_intent( domain, sequence_similarities, query_intent ) @@ -771,7 +780,7 @@ def _collect_label_id_grouped_scores( if LABEL_PAD_ID in unique_label_ids: unique_label_ids.remove(LABEL_PAD_ID) - label_id_scores = { + label_id_scores: Dict[int, Dict[Text, List[float]]] = { label_id: {POSITIVE_SCORES_KEY: [], NEGATIVE_SCORES_KEY: []} for label_id in unique_label_ids } diff --git a/rasa/core/processor.py b/rasa/core/processor.py index 38518141f75c..0123188c60f9 100644 --- a/rasa/core/processor.py +++ b/rasa/core/processor.py @@ -511,7 +511,7 @@ async def handle_reminder( ) else: intent = reminder_event.intent - entities = reminder_event.entities or {} + entities: Union[List[Dict], Dict] = reminder_event.entities or {} await self.trigger_external_user_uttered( intent, entities, tracker, output_channel ) @@ -705,9 +705,10 @@ async def _handle_message_with_tracker( @staticmethod def _should_handle_message(tracker: DialogueStateTracker) -> bool: - return ( - not tracker.is_paused() - or tracker.latest_message.intent.get(INTENT_NAME_KEY) == USER_INTENT_RESTART + return not tracker.is_paused() or ( + tracker.latest_message is not None + and tracker.latest_message.intent.get(INTENT_NAME_KEY) + == USER_INTENT_RESTART ) def is_action_limit_reached( diff --git a/rasa/core/test.py b/rasa/core/test.py index bca372455508..8fabcaaf1d1d 100644 --- a/rasa/core/test.py +++ b/rasa/core/test.py @@ -434,7 +434,8 @@ def _create_data_generator( from rasa.shared.core.generator import TrainingDataGenerator tmp_domain_path = Path(tempfile.mkdtemp()) / "domain.yaml" - agent.domain.persist(tmp_domain_path) + domain = agent.domain if agent.domain is not None else Domain.empty() + domain.persist(tmp_domain_path) test_data_importer = TrainingDataImporter.load_from_dict( training_data_paths=[resource_name], domain_path=str(tmp_domain_path) ) @@ -823,14 +824,25 @@ async def _predict_tracker_actions( ]: processor = agent.processor + if agent.processor is not None: + processor = agent.processor + else: + raise RasaException( + "The agent's processor has not been instantiated. " + "The processor needs to be defined before running " + "prediction." + ) + tracker_eval_store = EvaluationStore() events = list(tracker.events) + slots = agent.domain.slots if agent.domain is not None else [] + partial_tracker = DialogueStateTracker.from_events( tracker.sender_id, events[:1], - agent.domain.slots, + slots, sender_source=tracker.sender_source, ) tracker_actions = [] diff --git a/rasa/core/tracker_store.py b/rasa/core/tracker_store.py index dcd2b364527e..4c563cd67162 100644 --- a/rasa/core/tracker_store.py +++ b/rasa/core/tracker_store.py @@ -85,7 +85,7 @@ def __init__( destination. kwargs: Additional kwargs. """ - self.domain = domain + self.domain = domain or Domain.empty() self.event_broker = event_broker self.max_event_history = None @@ -219,7 +219,10 @@ def retrieve_full_tracker( return self.retrieve(conversation_id) def stream_events(self, tracker: DialogueStateTracker) -> None: - """Streams events to a message broker""" + """Streams events to a message broker.""" + if self.event_broker is None: + return None + offset = self.number_of_existing_events(tracker.sender_id) events = tracker.events for event in list(itertools.islice(events, offset, len(events))): @@ -273,13 +276,12 @@ def __init__( event_broker: Optional[EventBroker] = None, **kwargs: Dict[Text, Any], ) -> None: - self.store = {} + self.store: Dict[Text, Text] = {} super().__init__(domain, event_broker, **kwargs) def save(self, tracker: DialogueStateTracker) -> None: - """Updates and saves the current conversation state""" - if self.event_broker: - self.stream_events(tracker) + """Updates and saves the current conversation state.""" + self.stream_events(tracker) serialised = InMemoryTrackerStore.serialise_tracker(tracker) self.store[tracker.sender_id] = serialised @@ -327,6 +329,7 @@ def __init__( ssl_keyfile=ssl_keyfile, ssl_certfile=ssl_certfile, ssl_ca_certs=ssl_ca_certs, + decode_responses=True, ) self.record_exp = record_exp @@ -353,8 +356,7 @@ def save( self, tracker: DialogueStateTracker, timeout: Optional[float] = None ) -> None: """Saves the current conversation state.""" - if self.event_broker: - self.stream_events(tracker) + self.stream_events(tracker) if not timeout and self.record_exp: timeout = self.record_exp @@ -444,8 +446,7 @@ def get_or_create_table( def save(self, tracker: DialogueStateTracker) -> None: """Saves the current conversation state.""" - if self.event_broker: - self.stream_events(tracker) + self.stream_events(tracker) serialized = self.serialise_tracker(tracker) self.db.put_item(Item=serialized) @@ -476,9 +477,12 @@ def retrieve(self, sender_id: Text) -> Optional[DialogueStateTracker]: # `float`s are stored as `Decimal` objects - we need to convert them back events_with_floats = core_utils.replace_decimals_with_floats(events) - return DialogueStateTracker.from_dict( - sender_id, events_with_floats, self.domain.slots - ) + if self.domain is None: + slots = [] + else: + slots = self.domain.slots + + return DialogueStateTracker.from_dict(sender_id, events_with_floats, slots) def keys(self) -> Iterable[Text]: """Returns sender_ids of the `DynamoTrackerStore`.""" @@ -552,8 +556,7 @@ def _current_tracker_state_without_events(tracker: DialogueStateTracker) -> Dict def save(self, tracker: DialogueStateTracker) -> None: """Saves the current conversation state.""" - if self.event_broker: - self.stream_events(tracker) + self.stream_events(tracker) additional_events = self._additional_events(tracker) @@ -678,7 +681,6 @@ def _create_sequence(table_name: Text) -> "Sequence": Returns: A `Sequence` object """ - from sqlalchemy.ext.declarative import declarative_base sequence_name = f"{table_name}_seq" @@ -1065,9 +1067,7 @@ def _event_query( def save(self, tracker: DialogueStateTracker) -> None: """Update database with events from the current conversation.""" - - if self.event_broker: - self.stream_events(tracker) + self.stream_events(tracker) with self.session_scope() as session: # only store recent events diff --git a/rasa/core/training/converters/responses_prefix_converter.py b/rasa/core/training/converters/responses_prefix_converter.py index f16edd2fb7f9..8be36e933831 100644 --- a/rasa/core/training/converters/responses_prefix_converter.py +++ b/rasa/core/training/converters/responses_prefix_converter.py @@ -109,7 +109,7 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None: output_path: Path to the output directory. """ domain = Domain.from_path(source_path) - domain_dict = domain.cleaned_domain() + domain_dict = domain.as_dict() domain_dict["actions"] = [ normalize_utter_action(action) for action in domain_dict["actions"] ] @@ -118,4 +118,4 @@ async def convert_and_write(cls, source_path: Path, output_path: Path) -> None: output_file = cls.generate_path_for_converted_training_data_file( source_path, output_path ) - new_domain.persist_clean(output_file) + new_domain.persist(output_file) diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py index 34a5a262ceeb..e7de83415663 100644 --- a/rasa/core/training/interactive.py +++ b/rasa/core/training/interactive.py @@ -50,7 +50,13 @@ from rasa.core import run, utils import rasa.core.train from rasa.core.constants import DEFAULT_SERVER_FORMAT, DEFAULT_SERVER_PORT -from rasa.shared.core.domain import Domain +from rasa.shared.core.domain import ( + Domain, + KEY_INTENTS, + KEY_ENTITIES, + KEY_RESPONSES, + KEY_ACTIONS, +) import rasa.shared.core.events from rasa.shared.core.events import ( ActionExecuted, @@ -108,7 +114,7 @@ OTHER_ACTION = uuid.uuid4().hex NEW_ACTION = uuid.uuid4().hex -NEW_RESPONSES = {} +NEW_RESPONSES: Dict[Text, List[Dict[Text, Any]]] = {} MAX_NUMBER_OF_TRAINING_STORIES_FOR_VISUALIZATION = 200 @@ -312,9 +318,8 @@ async def _ask_questions( is_abort: Callable[[Dict[Text, Any]], bool] = lambda x: False, ) -> Any: """Ask the user a question, if Ctrl-C is pressed provide user with menu.""" - should_retry = True - answers = {} + answers: Any = {} while should_retry: answers = questions.ask() @@ -329,7 +334,6 @@ def _selection_choices_from_intent_prediction( predictions: List[Dict[Text, Any]] ) -> List[Dict[Text, Any]]: """Given a list of ML predictions create a UI choice list.""" - sorted_intents = sorted( predictions, key=lambda k: (-k["confidence"], k[INTENT_NAME_KEY]) ) @@ -917,7 +921,7 @@ def _write_domain_to_file( messages = _collect_messages(events) actions = _collect_actions(events) - responses = NEW_RESPONSES # type: Dict[Text, List[Dict[Text, Any]]] + responses = NEW_RESPONSES # TODO for now there is no way to distinguish between action and form collected_actions = list( @@ -929,16 +933,16 @@ def _write_domain_to_file( } ) - new_domain = Domain( - intents=_intents_from_messages(messages), - entities=_entities_from_messages(messages), - slots=[], - responses=responses, - action_names=collected_actions, - forms={}, + new_domain = Domain.from_dict( + { + KEY_INTENTS: list(_intents_from_messages(messages)), + KEY_ENTITIES: _entities_from_messages(messages), + KEY_RESPONSES: responses, + KEY_ACTIONS: collected_actions, + } ) - old_domain.merge(new_domain).persist_clean(domain_path) + old_domain.merge(new_domain).persist(domain_path) async def _predict_till_next_listen( @@ -952,7 +956,10 @@ async def _predict_till_next_listen( listen = False while not listen: result = await request_prediction(endpoint, conversation_id) - predictions = result.get("scores") or [] + if result is None: + result = {} + + predictions = result.get("scores", []) if not predictions: raise InvalidConfigException( "Cannot continue as no action was predicted by the dialogue manager. " @@ -1473,7 +1480,9 @@ async def record_messages( ) return - intents = [next(iter(i)) for i in (domain.get("intents") or [])] + domain_intents = domain.get("intents", []) if domain is not None else [] + + intents = [next(iter(i)) for i in domain_intents] num_messages = 0 diff --git a/rasa/core/training/story_conflict.py b/rasa/core/training/story_conflict.py index dc43a111a7e5..0bde5528cf05 100644 --- a/rasa/core/training/story_conflict.py +++ b/rasa/core/training/story_conflict.py @@ -38,7 +38,7 @@ def __init__(self, sliced_states: List[State]) -> None: self._sliced_states = sliced_states # A list of actions that all follow from the same state. - self._conflicting_actions = defaultdict( + self._conflicting_actions: defaultdict[Text, List[Text]] = defaultdict( list ) # {"action": ["story_1", ...], ...} @@ -196,7 +196,7 @@ def _find_conflicting_states( """ # Create a 'state -> list of actions' dict, where the state is # represented by its hash - state_action_mapping = defaultdict(list) + state_action_mapping: defaultdict[int, List[int]] = defaultdict(list) for element in _sliced_states_iterator(trackers, domain, max_history, tokenizer): hashed_state = element.sliced_states_hash diff --git a/rasa/core/training/training.py b/rasa/core/training/training.py index dde083940857..9c9af2768504 100644 --- a/rasa/core/training/training.py +++ b/rasa/core/training/training.py @@ -65,7 +65,7 @@ def create_action_fingerprints( # take into account only featurized slots featurized_slots = {slot.name for slot in domain.slots if slot.has_features()} - action_fingerprints = defaultdict(dict) + action_fingerprints: defaultdict[Text, Dict[Text, List[Text]]] = defaultdict(dict) for action_name, events_after_action in events_after_actions.items(): slots = list( set( diff --git a/rasa/engine/graph.py b/rasa/engine/graph.py index 0ad6bf751d7d..f1ab0d62b9e6 100644 --- a/rasa/engine/graph.py +++ b/rasa/engine/graph.py @@ -75,7 +75,7 @@ def as_dict(self) -> Dict[Text, Any]: Returns: The graph schema in a format which can be dumped as JSON or other formats. """ - serializable_graph_schema = {"nodes": {}} + serializable_graph_schema: Dict[Text, Dict[Text, Any]] = {"nodes": {}} for node_name, node in self.nodes.items(): serializable = dataclasses.asdict(node) diff --git a/rasa/engine/runner/dask.py b/rasa/engine/runner/dask.py index f2d5d7d511a3..2b17d5e2a9d6 100644 --- a/rasa/engine/runner/dask.py +++ b/rasa/engine/runner/dask.py @@ -105,6 +105,9 @@ def run( @staticmethod def _add_inputs_to_graph(inputs: Optional[Dict[Text, Any]], graph: Any) -> None: + if inputs is None: + return + for input_name, input_value in inputs.items(): if isinstance(input_value, str) and input_value in graph.keys(): raise GraphRunError( diff --git a/rasa/engine/storage/local_model_storage.py b/rasa/engine/storage/local_model_storage.py index 84c2d48d3dfd..0a6138048a3b 100644 --- a/rasa/engine/storage/local_model_storage.py +++ b/rasa/engine/storage/local_model_storage.py @@ -8,7 +8,7 @@ from contextlib import contextmanager from datetime import datetime from pathlib import Path -from typing import Text, ContextManager, Tuple, Union +from typing import Text, Generator, Tuple, Union import rasa.utils.common import rasa.shared.utils.io @@ -112,7 +112,7 @@ def _load_metadata(directory: Path) -> ModelMetadata: return ModelMetadata.from_dict(serialized_metadata) @contextmanager - def write_to(self, resource: Resource) -> ContextManager[Path]: + def write_to(self, resource: Resource) -> Generator[Path, None, None]: """Persists data for a resource (see parent class for full docstring).""" logger.debug(f"Resource '{resource.name}' was requested for writing.") directory = self._directory_for_resource(resource) @@ -128,7 +128,7 @@ def _directory_for_resource(self, resource: Resource) -> Path: return self._storage_path / resource.name @contextmanager - def read_from(self, resource: Resource) -> ContextManager[Path]: + def read_from(self, resource: Resource) -> Generator[Path, None, None]: """Provides the data of a `Resource` (see parent class for full docstring).""" logger.debug(f"Resource '{resource.name}' was requested for reading.") directory = self._directory_for_resource(resource) @@ -164,6 +164,9 @@ def create_model_package( model_metadata = self._create_model_metadata(domain, model_configuration) self._persist_metadata(model_metadata, temporary_directory) + if isinstance(model_archive_path, str): + model_archive_path = Path(model_archive_path) + if not model_archive_path.parent.exists(): model_archive_path.parent.mkdir(parents=True) diff --git a/rasa/engine/storage/storage.py b/rasa/engine/storage/storage.py index a4740ce8898c..8c5d5b48baa2 100644 --- a/rasa/engine/storage/storage.py +++ b/rasa/engine/storage/storage.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from datetime import datetime from pathlib import Path -from typing import Tuple, Union, Text, ContextManager, Dict, Any, Optional +from typing import Tuple, Union, Text, Generator, Dict, Any, Optional from packaging import version from rasa.constants import MINIMUM_COMPATIBLE_VERSION @@ -74,7 +74,7 @@ def metadata_from_archive( @contextmanager @abc.abstractmethod - def write_to(self, resource: Resource) -> ContextManager[Path]: + def write_to(self, resource: Resource) -> Generator[Path, None, None]: """Persists data for a given resource. This `Resource` can then be accessed in dependent graph nodes via @@ -90,7 +90,7 @@ def write_to(self, resource: Resource) -> ContextManager[Path]: @contextmanager @abc.abstractmethod - def read_from(self, resource: Resource) -> ContextManager[Path]: + def read_from(self, resource: Resource) -> Generator[Path, None, None]: """Provides the data of a persisted `Resource`. Args: diff --git a/rasa/engine/validation.py b/rasa/engine/validation.py index fe843b2dfb96..b57f227becdb 100644 --- a/rasa/engine/validation.py +++ b/rasa/engine/validation.py @@ -438,9 +438,8 @@ def _validate_needs( ) required_type = available_args.get(param_name) - needs_passed_to_kwargs = has_kwargs and required_type is None - if not needs_passed_to_kwargs: + if not has_kwargs and required_type is not None: parent = None if _is_placeholder_input(parent_name): parent_return_type = RESERVED_PLACEHOLDERS[parent_name] diff --git a/rasa/graph_components/providers/domain_for_core_training_provider.py b/rasa/graph_components/providers/domain_for_core_training_provider.py index ae1455de277f..e3dc17842e76 100644 --- a/rasa/graph_components/providers/domain_for_core_training_provider.py +++ b/rasa/graph_components/providers/domain_for_core_training_provider.py @@ -80,8 +80,8 @@ def create_pruned_version(domain: Domain) -> Domain: serialized_domain.pop("config", None) # `store_entities_as_slots` serialized_domain.pop(SESSION_CONFIG_KEY, None) - for response_name in serialized_domain[KEY_RESPONSES]: + for response_name in serialized_domain.get(KEY_RESPONSES, []): serialized_domain[KEY_RESPONSES][response_name] = [] - for form_name in serialized_domain[KEY_FORMS]: + for form_name in serialized_domain.get(KEY_FORMS, []): serialized_domain[KEY_FORMS][form_name] = {REQUIRED_SLOTS_KEY: []} return Domain.from_dict(serialized_domain) diff --git a/rasa/graph_components/validators/default_recipe_validator.py b/rasa/graph_components/validators/default_recipe_validator.py index 0b5b02a2e5db..59613492defa 100644 --- a/rasa/graph_components/validators/default_recipe_validator.py +++ b/rasa/graph_components/validators/default_recipe_validator.py @@ -1,6 +1,6 @@ from __future__ import annotations from collections import defaultdict -from typing import Iterable, List, Dict, Text, Any, Set, Type +from typing import Iterable, List, Dict, Text, Any, Set, Type, cast from rasa.core.featurizers.precomputation import CoreFeaturizationInputConverter from rasa.engine.graph import ExecutionContext, GraphComponent, GraphSchema, SchemaNode @@ -477,7 +477,7 @@ def _warn_if_rule_based_data_is_unused_or_missing( story_graph: a story graph (core training data) """ consuming_rule_data = any( - policy_node.uses.supported_data() + cast(Policy, policy_node.uses).supported_data() in [SupportedData.RULE_DATA, SupportedData.ML_AND_RULE_DATA] for policy_node in self._policy_schema_nodes ) diff --git a/rasa/jupyter.py b/rasa/jupyter.py index abb40a442906..42cc9addc079 100644 --- a/rasa/jupyter.py +++ b/rasa/jupyter.py @@ -3,6 +3,7 @@ from typing import Any, Dict, Optional, Text import asyncio +from rasa.shared.exceptions import RasaException from rasa.shared.utils.cli import print_success import rasa.core.agent import rasa.utils.common @@ -31,6 +32,12 @@ def chat( if model_path: agent = rasa.core.agent.load_agent(model_path=model_path, endpoints=endpoints) + if agent is None: + raise RasaException( + "Either the provided model path could not load the agent " + "or no core agent was provided." + ) + print("Your bot is ready to talk! Type your messages here or send '/stop'.") while True: message = input() diff --git a/rasa/model_testing.py b/rasa/model_testing.py index b9a6fb9334c9..3e66ae43769f 100644 --- a/rasa/model_testing.py +++ b/rasa/model_testing.py @@ -244,7 +244,7 @@ async def compare_nlu_models( bases = [os.path.basename(nlu_config) for nlu_config in configs] model_names = [os.path.splitext(base)[0] for base in bases] - f1_score_results = { + f1_score_results: Dict[Text, List[List[float]]] = { model_name: [[] for _ in range(runs)] for model_name in model_names } diff --git a/rasa/model_training.py b/rasa/model_training.py index a01eac75fa12..ce7b0db600df 100644 --- a/rasa/model_training.py +++ b/rasa/model_training.py @@ -134,8 +134,8 @@ def train( ) return TrainingResult(code=1) - domain = file_importer.get_domain() - if domain.is_empty(): + domain_object = file_importer.get_domain() + if domain_object.is_empty(): rasa.shared.utils.cli.print_warning( "Core training was skipped because no valid domain file was found. " "Only an NLU-model was created. Please specify a valid domain using " diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 790802f07745..ce5cf9af06a2 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -3,6 +3,8 @@ import logging from collections import defaultdict from pathlib import Path + +from rasa.exceptions import ModelNotFound from rasa.nlu.featurizers.featurizer import Featurizer import numpy as np @@ -666,6 +668,9 @@ def _create_label_data( return label_data def _use_default_label_features(self, label_ids: np.ndarray) -> List[FeatureArray]: + if self._label_data is None: + return [] + feature_arrays: List[FeatureArray] = self._label_data.get(LABEL, SENTENCE) all_label_features = feature_arrays[0] return [ @@ -887,6 +892,9 @@ def train(self, training_data: TrainingData) -> Resource: optimizer=tf.keras.optimizers.Adam(self.component_config[LEARNING_RATE]) ) else: + if self.model is None: + raise ModelNotFound("Model could not be found. ") + self.model.adjust_for_incremental_training( data_example=self._data_example, new_sparse_feature_sizes=model_data.get_sparse_feature_sizes(), @@ -946,7 +954,7 @@ def _predict_label( ) -> Tuple[Dict[Text, Any], List[Dict[Text, Any]]]: """Predicts the intent of the provided message.""" label: Dict[Text, Any] = {"name": None, "confidence": 0.0} - label_ranking = [] + label_ranking: List[Dict[Text, Any]] = [] if predict_out is None: return label, label_ranking @@ -1058,7 +1066,8 @@ def persist(self) -> None: self._sparse_feature_sizes, ) io_utils.pickle_dump( - model_path / f"{file_name}.label_data.pkl", dict(self._label_data.data) + model_path / f"{file_name}.label_data.pkl", + dict(self._label_data.data) if self._label_data is not None else {}, ) io_utils.json_pickle( model_path / f"{file_name}.index_label_id_mapping.json", diff --git a/rasa/nlu/classifiers/sklearn_intent_classifier.py b/rasa/nlu/classifiers/sklearn_intent_classifier.py index 087aae9826f6..4166ab37254a 100644 --- a/rasa/nlu/classifiers/sklearn_intent_classifier.py +++ b/rasa/nlu/classifiers/sklearn_intent_classifier.py @@ -15,6 +15,7 @@ from rasa.engine.storage.storage import ModelStorage from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU from rasa.nlu.classifiers import LABEL_RANKING_LENGTH +from rasa.shared.exceptions import RasaException from rasa.shared.nlu.constants import TEXT from rasa.nlu.classifiers.classifier import IntentClassifier from rasa.shared.nlu.training_data.training_data import TrainingData @@ -63,7 +64,7 @@ def __init__( config: Dict[Text, Any], model_storage: ModelStorage, resource: Resource, - clf: "sklearn.model_selection.GridSearchCV" = None, + clf: Optional["sklearn.model_selection.GridSearchCV"] = None, le: Optional["sklearn.preprocessing.LabelEncoder"] = None, ) -> None: """Construct a new intent classifier using the sklearn framework.""" @@ -195,7 +196,7 @@ def _create_classifier( def process(self, messages: List[Message]) -> List[Message]: """Return the most likely intent and its probability for a message.""" for message in messages: - if not self.clf or not message.features_present( + if self.clf is None or not message.features_present( attribute=TEXT, featurizers=self.component_config.get(FEATURIZERS) ): # component is either not trained or didn't @@ -240,6 +241,11 @@ def predict_prob(self, X: np.ndarray) -> np.ndarray: :param X: bow of input text :return: vector of probabilities containing one entry for each label. """ + if self.clf is None: + raise RasaException( + "Sklearn intent classifier has not been initialised and trained." + ) + return self.clf.predict_proba(X) def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: diff --git a/rasa/nlu/emulators/luis.py b/rasa/nlu/emulators/luis.py index 97192164689a..b2653548cb65 100644 --- a/rasa/nlu/emulators/luis.py +++ b/rasa/nlu/emulators/luis.py @@ -43,7 +43,7 @@ def _entities( if ENTITIES not in data: return {} - entities = {"$instance": {}} + entities: Dict[Text, Dict[Text, List[Dict[Text, Any]]]] = {"$instance": {}} for e in data[ENTITIES]: # LUIS API v3 uses entity roles instead of entity names # (it's possible because its roles are unique): diff --git a/rasa/nlu/emulators/wit.py b/rasa/nlu/emulators/wit.py index bdbc80589684..1f5c9c0d89e8 100644 --- a/rasa/nlu/emulators/wit.py +++ b/rasa/nlu/emulators/wit.py @@ -35,7 +35,7 @@ def normalise_response_json(self, data: Dict[Text, Any]) -> Dict[Text, Any]: entity_name = entity[ENTITY_ATTRIBUTE_TYPE] role = entity.get(ENTITY_ATTRIBUTE_ROLE, entity_name) entity_name_including_role = f"{entity[ENTITY_ATTRIBUTE_TYPE]}:{role}" - normalized_entity = { + normalized_entity: Dict[Text, Any] = { "confidence": entity.get("confidence_entity") or 1, "name": entity_name, "value": entity[ENTITY_ATTRIBUTE_VALUE], diff --git a/rasa/nlu/extractors/crf_entity_extractor.py b/rasa/nlu/extractors/crf_entity_extractor.py index fc6f5bcf65d3..1332c250d55a 100644 --- a/rasa/nlu/extractors/crf_entity_extractor.py +++ b/rasa/nlu/extractors/crf_entity_extractor.py @@ -292,7 +292,7 @@ def extract_entities(self, message: Message) -> List[Dict[Text, Any]]: tokens = message.get(TOKENS_NAMES[TEXT]) crf_tokens = self._convert_to_crf_tokens(message) - predictions = {} + predictions: Dict[Text, List[Dict[Text, float]]] = {} for tag_name, entity_tagger in self.entity_taggers.items(): # use predicted entity tags as features for second level CRFs include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE diff --git a/rasa/nlu/extractors/mitie_entity_extractor.py b/rasa/nlu/extractors/mitie_entity_extractor.py index 8472fdb4005b..2a2705f665f4 100644 --- a/rasa/nlu/extractors/mitie_entity_extractor.py +++ b/rasa/nlu/extractors/mitie_entity_extractor.py @@ -231,9 +231,12 @@ def _extract_entities( entities = [] token_texts = [token.text for token in tokens] - mitie_entities = self._ner.extract_entities( - token_texts, mitie_model.word_feature_extractor - ) + if self._ner is None: + mitie_entities = [] + else: + mitie_entities = self._ner.extract_entities( + token_texts, mitie_model.word_feature_extractor + ) for e in mitie_entities: if len(e[0]): start = tokens[e[0][0]].start diff --git a/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py b/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py index f5b5a3eb7b02..9c89c6ed87d5 100644 --- a/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +++ b/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py @@ -150,7 +150,7 @@ def _load_model_instance(self) -> None: self.tokenizer = model_tokenizer_dict[self.model_name].from_pretrained( self.model_weights, cache_dir=self.cache_dir ) - self.model = model_class_dict[self.model_name].from_pretrained( + self.model = model_class_dict[self.model_name].from_pretrained( # type: ignore[no-untyped-call] # noqa: E501 self.model_weights, cache_dir=self.cache_dir ) diff --git a/rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py b/rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py index 945851d31c85..221f1bf260f6 100644 --- a/rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +++ b/rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py @@ -44,6 +44,8 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent): from https://arxiv.org/abs/1810.07150. """ + OOV_words: List[Text] + @classmethod def required_components(cls) -> List[Type]: """Components that should be included in the pipeline before this component.""" @@ -551,8 +553,8 @@ def _create_features( if not self.vectorizers.get(attribute): return [None], [None] - sequence_features = [] - sentence_features = [] + sequence_features: List[Optional[scipy.sparse.spmatrix]] = [] + sentence_features: List[Optional[scipy.sparse.spmatrix]] = [] for i, tokens in enumerate(all_tokens): if not tokens: diff --git a/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py b/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py index 205946cb37f8..97e11b54089b 100644 --- a/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +++ b/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py @@ -4,7 +4,18 @@ import scipy.sparse import numpy as np -from typing import Any, Dict, Text, List, Tuple, Callable, Set, Optional, Type, Union +from typing import ( + Any, + Dict, + Text, + List, + Tuple, + Callable, + Set, + Optional, + Type, + Union, +) from rasa.engine.graph import ExecutionContext, GraphComponent from rasa.engine.recipes.default_recipe import DefaultV1Recipe @@ -359,9 +370,9 @@ def _build_feature_to_index_map( """ # Note that this will only sort the top level keys - and we keep # doing it to ensure consistently with what was done before) - ordered_feature_vocabulary: OrderedDict[ - Tuple[int, Text], Set[Text] - ] = OrderedDict(sorted(feature_vocabulary.items())) + ordered_feature_vocabulary: Dict[Tuple[int, Text], Set[Text]] = OrderedDict( + sorted(feature_vocabulary.items()) + ) # create the nested mapping feature_to_idx_dict: Dict[Tuple[int, Text], Dict[Text, int]] = {} diff --git a/rasa/nlu/persistor.py b/rasa/nlu/persistor.py index 33a1d497af49..057c8da851f6 100644 --- a/rasa/nlu/persistor.py +++ b/rasa/nlu/persistor.py @@ -162,7 +162,10 @@ class GCSPersistor(Persistor): Fetches them when needed, instead of storing them on the local disk.""" def __init__(self, bucket_name: Text) -> None: - from google.cloud import storage + """Initialise class with client and bucket.""" + # there are no type hints in this repo for now + # https://github.com/googleapis/python-storage/issues/393 + from google.cloud import storage # type: ignore[attr-defined] super().__init__() diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index a68a7278792a..f3ba95fdd08c 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -357,7 +357,7 @@ def _warn_about_transformer_and_hidden_layers_enabled( self.component_config[HIDDEN_LAYERS_SIZES] == default_config[HIDDEN_LAYERS_SIZES] ) - config_for_disabling_hidden_layers = { + config_for_disabling_hidden_layers: Dict[Text, List[Any]] = { k: [] for k, _ in default_config[HIDDEN_LAYERS_SIZES].items() } # warn if the hidden layers aren't disabled diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index 28b82d9993f1..34aa05306a10 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -1730,7 +1730,7 @@ async def compute_metrics( response_selection_results ) - intent_metrics = {} + intent_metrics: IntentMetrics = {} if intent_results: intent_metrics = _compute_metrics( intent_results, "intent_target", "intent_prediction" @@ -1740,7 +1740,7 @@ async def compute_metrics( if entity_results: entity_metrics = _compute_entity_metrics(entity_results) - response_selection_metrics = {} + response_selection_metrics: ResponseSelectionMetrics = {} if response_selection_results: response_selection_metrics = _compute_metrics( response_selection_results, @@ -1762,7 +1762,7 @@ async def compare_nlu( configs: List[Text], data: TrainingData, exclusion_percentages: List[int], - f_score_results: Dict[Text, Any], + f_score_results: Dict[Text, List[List[float]]], model_names: List[Text], output: Text, runs: int, diff --git a/rasa/nlu/utils/hugging_face/registry.py b/rasa/nlu/utils/hugging_face/registry.py index 002269c02226..f7f6cc4c4d02 100644 --- a/rasa/nlu/utils/hugging_face/registry.py +++ b/rasa/nlu/utils/hugging_face/registry.py @@ -1,10 +1,12 @@ import logging +from typing import Dict, Text, Type # Explicitly set logging level for this module before any import # because otherwise it logs tensorflow/pytorch versions logging.getLogger("transformers.file_utils").setLevel(logging.WARNING) from transformers import ( # noqa: F401, E402 + TFPreTrainedModel, TFBertModel, TFOpenAIGPTModel, TFGPT2Model, @@ -12,6 +14,7 @@ # TFXLMModel, TFDistilBertModel, TFRobertaModel, + PreTrainedTokenizer, BertTokenizer, OpenAIGPTTokenizer, GPT2Tokenizer, @@ -36,7 +39,7 @@ ) -model_class_dict = { +model_class_dict: Dict[Text, Type[TFPreTrainedModel]] = { "bert": TFBertModel, "gpt": TFOpenAIGPTModel, "gpt2": TFGPT2Model, @@ -46,7 +49,7 @@ "distilbert": TFDistilBertModel, "roberta": TFRobertaModel, } -model_tokenizer_dict = { +model_tokenizer_dict: Dict[Text, Type[PreTrainedTokenizer]] = { "bert": BertTokenizer, "gpt": OpenAIGPTTokenizer, "gpt2": GPT2Tokenizer, diff --git a/rasa/server.py b/rasa/server.py index 72d64cb9382d..08efffbe55f4 100644 --- a/rasa/server.py +++ b/rasa/server.py @@ -1236,7 +1236,7 @@ def _get_evaluation_results( "response_selection_evaluation": response_selector_report, } - result = defaultdict(dict) + result: defaultdict[Text, Any] = defaultdict(dict) for evaluation_name, evaluation in eval_name_mapping.items(): report = evaluation.evaluation.get("report", {}) averages = report.get("weighted avg", {}) @@ -1364,7 +1364,10 @@ async def unload_model(request: Request) -> HTTPResponse: @ensure_loaded_agent(app) async def get_domain(request: Request) -> HTTPResponse: """Get current domain in yaml or json format.""" - accepts = request.headers.get("Accept", default=JSON_CONTENT_TYPE) + # FIXME: this is a false positive mypy error after upgrading to 0.931 + accepts = request.headers.get( # type: ignore[call-overload] + "Accept", default=JSON_CONTENT_TYPE + ) if accepts.endswith("json"): domain = app.ctx.agent.domain.as_dict() return response.json(domain) diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py index 0b00c1f1ef88..e77a4b8c95e8 100644 --- a/rasa/shared/constants.py +++ b/rasa/shared/constants.py @@ -1,3 +1,6 @@ +from typing import List, Text + + DOCS_BASE_URL = "https://rasa.com/docs/rasa" LEGACY_DOCS_BASE_URL = "https://legacy-docs-v1.rasa.com" DOCS_URL_TRAINING_DATA = DOCS_BASE_URL + "/training-data-format" @@ -68,7 +71,7 @@ CONFIG_KEYS_CORE = ["policies"] CONFIG_KEYS_NLU = ["language", "pipeline"] CONFIG_KEYS = CONFIG_KEYS_CORE + CONFIG_KEYS_NLU -CONFIG_MANDATORY_KEYS_CORE = [] +CONFIG_MANDATORY_KEYS_CORE: List[Text] = [] CONFIG_MANDATORY_KEYS_NLU = ["language"] CONFIG_MANDATORY_KEYS = CONFIG_MANDATORY_KEYS_CORE + CONFIG_MANDATORY_KEYS_NLU diff --git a/rasa/shared/core/conversation.py b/rasa/shared/core/conversation.py index 807464190a75..e2e4e2cb8c60 100644 --- a/rasa/shared/core/conversation.py +++ b/rasa/shared/core/conversation.py @@ -34,8 +34,7 @@ def from_parameters(cls, parameters: Dict[Text, Any]) -> "Dialogue": Deserialised `Dialogue`. """ - return cls( parameters.get("name"), - [Event.from_parameters(evt) for evt in parameters.get("events")], + [Event.from_parameters(evt) for evt in parameters.get("events", [])], ) diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index 27ed4f99ade9..49bba476fe29 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -8,7 +8,6 @@ Any, Dict, List, - NamedTuple, NoReturn, Optional, Set, @@ -17,8 +16,12 @@ Union, TYPE_CHECKING, Iterable, + NamedTuple, + Callable, ) +from ruamel.yaml.scalarstring import DoubleQuotedScalarString + from rasa.shared.constants import ( DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION, @@ -29,10 +32,15 @@ DOCS_URL_RESPONSES, REQUIRED_SLOTS_KEY, IGNORED_INTENTS, + RESPONSE_CONDITION, ) import rasa.shared.core.constants from rasa.shared.core.constants import SlotMappingType, MAPPING_TYPE, MAPPING_CONDITIONS -from rasa.shared.exceptions import RasaException, YamlException, YamlSyntaxException +from rasa.shared.exceptions import ( + RasaException, + YamlException, + YamlSyntaxException, +) import rasa.shared.utils.validation import rasa.shared.utils.io import rasa.shared.utils.common @@ -40,7 +48,6 @@ from rasa.shared.core.events import SlotSet, UserUttered from rasa.shared.core.slots import Slot, CategoricalSlot, TextSlot, AnySlot, ListSlot from rasa.shared.utils.validation import KEY_TRAINING_DATA_FORMAT_VERSION -from rasa.shared.constants import RESPONSE_CONDITION from rasa.shared.nlu.constants import ( ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, @@ -121,19 +128,29 @@ def are_sessions_enabled(self) -> bool: """Returns a boolean value depending on the value of session_expiration_time.""" return self.session_expiration_time > 0 + def as_dict(self) -> Dict: + """Return serialized `SessionConfig`.""" + return { + "session_expiration_time": self.session_expiration_time, + "carry_over_slots_to_new_session": self.carry_over_slots, + } + class Domain: """The domain specifies the universe in which the bot's policy acts. A Domain subclass provides the actions the bot can take, the intents - and entities it can recognise.""" + and entities it can recognise. + """ @classmethod def empty(cls) -> "Domain": - return cls([], [], [], {}, [], {}) + """Returns empty Domain.""" + return Domain.from_dict({}) @classmethod def load(cls, paths: Union[List[Union[Path, Text]], Text, Path]) -> "Domain": + """Returns loaded Domain after merging all domain files.""" if not paths: raise InvalidDomain( "No domain file was specified. Please specify a path " @@ -151,6 +168,7 @@ def load(cls, paths: Union[List[Union[Path, Text]], Text, Path]) -> "Domain": @classmethod def from_path(cls, path: Union[Text, Path]) -> "Domain": + """Loads the `Domain` from a path.""" path = os.path.abspath(path) if os.path.isfile(path): @@ -192,17 +210,19 @@ def from_dict(cls, data: Dict) -> "Domain": Args: data: The serialized domain. - duplicates: A dictionary where keys are `intents`, `slots`, `forms` and - `responses` and values are lists of duplicated entries of a - corresponding type when the domain is built from multiple files. Returns: The instantiated `Domain` object. """ + duplicates = data.pop("duplicates", None) + if duplicates: + warn_about_duplicates_found_during_domain_merging(duplicates) + responses = data.get(KEY_RESPONSES, {}) domain_slots = data.get(KEY_SLOTS, {}) - rasa.shared.core.slot_mappings.validate_slot_mappings(domain_slots) + if domain_slots: + rasa.shared.core.slot_mappings.validate_slot_mappings(domain_slots) slots = cls.collect_slots(domain_slots) additional_arguments = data.get("config", {}) @@ -211,7 +231,6 @@ def from_dict(cls, data: Dict) -> "Domain": forms = data.get(KEY_FORMS, {}) _validate_forms(forms) - duplicates = data.get("duplicates", None) return cls( intents=intents, @@ -220,9 +239,9 @@ def from_dict(cls, data: Dict) -> "Domain": responses=responses, action_names=data.get(KEY_ACTIONS, []), forms=data.get(KEY_FORMS, {}), + data=Domain._cleaned_data(data), action_texts=data.get(KEY_E2E_ACTIONS, []), session_config=session_config, - duplicates=duplicates, **additional_arguments, ) @@ -242,7 +261,7 @@ def _get_session_config(session_config: Dict) -> SessionConfig: @classmethod def from_directory(cls, path: Text) -> "Domain": """Loads and merges multiple domain files recursively from a directory tree.""" - domain_dict = {} + domain_dict: Dict[Text, Any] = {} for root, _, files in os.walk(path, followlinks=True): for file in files: full_path = os.path.join(root, file) @@ -251,18 +270,25 @@ def from_directory(cls, path: Text) -> "Domain": other_dict = rasa.shared.utils.io.read_yaml( rasa.shared.utils.io.read_file(full_path) ) - domain_dict = Domain.merge_domain_dicts( - cls, domain_dict, other_dict - ) + domain_dict = Domain.merge_domain_dicts(other_dict, domain_dict) + domain = Domain.from_dict(domain_dict) return domain - def merge(self, domain: Optional["Domain"], override: bool = False) -> "Domain": - """Merge this domain with another one, combining their attributes. + def merge( + self, + domain: Optional["Domain"], + override: bool = False, + ) -> "Domain": + """Merges this domain dict with another one, combining their attributes. + + This method merges domain dicts, and ensures all attributes (like ``intents``, + ``entities``, and ``actions``) are known to the Domain when the + object is created. - List attributes like ``intents`` and ``actions`` will be deduped - and merged. Single attributes will be taken from `self` unless - override is `True`, in which case they are taken from `domain`. + List attributes like ``intents`` and ``actions`` are deduped + and merged. Single attributes are taken from `domain1` unless + override is `True`, in which case they are taken from `domain2`. """ if not domain or domain.is_empty(): return self @@ -270,177 +296,153 @@ def merge(self, domain: Optional["Domain"], override: bool = False) -> "Domain": if self.is_empty(): return domain - domain_dict = domain.as_dict() - combined = self.as_dict() - - if override: - config = domain_dict["config"] - for key, val in config.items(): - combined["config"][key] = val - - if override or self.session_config == SessionConfig.default(): - combined[SESSION_CONFIG_KEY] = domain_dict[SESSION_CONFIG_KEY] - - for key in [KEY_INTENTS, KEY_ENTITIES]: - if combined[key] or domain_dict[key]: - combined[key] = self.merge_lists_of_dicts( - combined[key], domain_dict[key], override - ) - # remove existing forms from new actions - for form in combined[KEY_FORMS]: - if form in domain_dict[KEY_ACTIONS]: - domain_dict[KEY_ACTIONS].remove(form) - - for key in [KEY_ACTIONS, KEY_E2E_ACTIONS]: - combined[key] = self.merge_lists(combined[key], domain_dict[key]) - - for key in [KEY_FORMS, KEY_RESPONSES, KEY_SLOTS]: - combined[key] = self.merge_dicts(combined[key], domain_dict[key], override) + merged_dict = self.__class__.merge_domain_dicts( + domain.as_dict(), self.as_dict(), override + ) - return self.__class__.from_dict(combined) + return Domain.from_dict(merged_dict) + @staticmethod def merge_domain_dicts( - self, domain1: Dict, domain2: Dict, override: bool = False - ) -> Dict[Text, Any]: - """Merges this domain dict with another one, combining their attributes. + domain_dict: Dict, + combined: Dict, + override: bool = False, + ) -> Dict: + """Combines two domain dictionaries.""" + if not domain_dict: + return combined - This is used when multiple domain yml files are configured in a single - directory. Unlike the merge method above, which merges Domain objects by - creating each object then merging it with the previous, this method merges - domain dicts, and ensures all attributes (like ``intents``, ``entities``, and - ``actions``) are known to the Domain when the object is created. - - List attributes like ``intents`` and ``actions`` are deduped - and merged. Single attributes are taken from `domain1` unless - override is `True`, in which case they are taken from `domain2`. - """ - if not domain2: - return domain1 - - if not domain1: - return domain2 - - domain_dict = domain2 - combined = domain1 + if not combined: + return domain_dict if override: - config = domain_dict["config"] + config = domain_dict.get("config", {}) for key, val in config.items(): combined["config"][key] = val - if override or domain2.get("session_config"): + if ( + override + or combined.get(SESSION_CONFIG_KEY) == SessionConfig.default().as_dict() + or combined.get(SESSION_CONFIG_KEY) is None + ) and domain_dict.get(SESSION_CONFIG_KEY): combined[SESSION_CONFIG_KEY] = domain_dict[SESSION_CONFIG_KEY] - duplicates: Dict[Text, List[Text]] = {} - - for key in [KEY_INTENTS, KEY_ENTITIES]: - if combined.get(key) or domain_dict.get(key): - duplicates[key] = self.extract_duplicates( - combined.get(key, []), domain_dict.get(key, []) - ) - combined[key] = combined.get(key, []) - domain_dict[key] = domain_dict.get(key, []) - combined[key] = self.merge_lists_of_dicts( - combined[key], domain_dict[key], override - ) - # remove existing forms from new actions for form in combined.get(KEY_FORMS, []): if form in domain_dict.get(KEY_ACTIONS, []): domain_dict[KEY_ACTIONS].remove(form) - for key in [KEY_ACTIONS, KEY_E2E_ACTIONS]: - duplicates[key] = self.extract_duplicates( - combined.get(key, []), domain_dict.get(key, []) - ) - combined[key] = self.merge_lists( - combined.get(key, []), domain_dict.get(key, []) - ) + duplicates: Dict[Text, List[Text]] = {} + + merge_func_mappings: Dict[Text, Callable[..., Any]] = { + KEY_INTENTS: rasa.shared.utils.common.merge_lists_of_dicts, + KEY_ENTITIES: rasa.shared.utils.common.merge_lists_of_dicts, + KEY_ACTIONS: rasa.shared.utils.common.merge_lists, + KEY_E2E_ACTIONS: rasa.shared.utils.common.merge_lists, + KEY_FORMS: rasa.shared.utils.common.merge_dicts, + KEY_RESPONSES: rasa.shared.utils.common.merge_dicts, + KEY_SLOTS: rasa.shared.utils.common.merge_dicts, + } - for key in [KEY_FORMS, KEY_RESPONSES, KEY_SLOTS]: - duplicates[key] = self.extract_duplicates( + for key, merge_func in merge_func_mappings.items(): + duplicates[key] = rasa.shared.utils.common.extract_duplicates( combined.get(key, []), domain_dict.get(key, []) ) - combined[key] = self.merge_dicts( - combined.get(key, {}), domain_dict.get(key, {}), override + + if merge_func == rasa.shared.utils.common.merge_dicts: + default: Dict[Text, Any] = {} + else: + default = [] + + combined[key] = merge_func( + combined.get(key, default), domain_dict.get(key, default), override ) if duplicates: - duplicates = self.clean_duplicates(duplicates) + duplicates = rasa.shared.utils.common.clean_duplicates(duplicates) combined.update({"duplicates": duplicates}) - return combined - @staticmethod - def extract_duplicates(list1: List[Any], list2: List[Any]) -> List[Any]: - """Extracts duplicates from two lists.""" - if list1: - dict1 = { - (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i - for i in list1 - } - else: - dict1 = {} + return combined - if list2: - dict2 = { - (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i - for i in list2 - } - else: - dict2 = {} + def _preprocess_domain_dict( + self, + data: Dict, + store_entities_as_slots: bool, + session_config: SessionConfig, + ) -> Dict: + data = self._add_default_keys_to_domain_dict( + data, + store_entities_as_slots, + session_config, + ) + data = self._sanitize_intents_in_domain_dict(data) - set1 = set(dict1.keys()) - set2 = set(dict2.keys()) - dupes = set1.intersection(set2) - return sorted(list(dupes)) + return data @staticmethod - def clean_duplicates(dupes: Dict[Text, Any]) -> Dict[Text, Any]: - """Removes keys for empty values.""" - duplicates = dupes.copy() - for k in dupes: - if not dupes[k]: - duplicates.pop(k) + def _add_default_keys_to_domain_dict( + data: Dict, + store_entities_as_slots: bool, + session_config: SessionConfig, + ) -> Dict: + # add the config, session_config and training data version defaults + # if not included in the original domain dict + if "config" not in data and not store_entities_as_slots: + data.update( + {"config": {"store_entities_as_slots": store_entities_as_slots}} + ) - return duplicates + if SESSION_CONFIG_KEY not in data: + data.update( + { + SESSION_CONFIG_KEY: { + SESSION_EXPIRATION_TIME_KEY: ( + session_config.session_expiration_time + ), + CARRY_OVER_SLOTS_KEY: session_config.carry_over_slots, + } + } + ) - @staticmethod - def merge_dicts( - tempDict1: Dict[Text, Any], - tempDict2: Dict[Text, Any], - override_existing_values: bool = False, - ) -> Dict[Text, Any]: - """Merges two dicts.""" - if override_existing_values: - merged_dicts, b = tempDict1.copy(), tempDict2.copy() + if KEY_TRAINING_DATA_FORMAT_VERSION not in data: + data.update( + { + KEY_TRAINING_DATA_FORMAT_VERSION: DoubleQuotedScalarString( + LATEST_TRAINING_DATA_FORMAT_VERSION + ) + } + ) - else: - merged_dicts, b = tempDict2.copy(), tempDict1.copy() - merged_dicts.update(b) - return merged_dicts + return data @staticmethod - def merge_lists(list1: List[Any], list2: List[Any]) -> List[Any]: - """Merges two lists.""" - return sorted(list(set(list1 + list2))) + def _reset_intent_flags(intent: Dict[Text, Any]) -> None: + for intent_property in intent.values(): + if ( + USE_ENTITIES_KEY in intent_property.keys() + and not intent_property[USE_ENTITIES_KEY] + ): + intent_property[USE_ENTITIES_KEY] = [] + if ( + IGNORE_ENTITIES_KEY in intent_property.keys() + and not intent_property[IGNORE_ENTITIES_KEY] + ): + intent_property[IGNORE_ENTITIES_KEY] = [] @staticmethod - def merge_lists_of_dicts( - dict_list1: List[Dict], - dict_list2: List[Dict], - override_existing_values: bool = False, - ) -> List[Dict]: - """Merges two dict lists.""" - dict1 = { - (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i - for i in dict_list1 - } - dict2 = { - (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i - for i in dict_list2 - } - merged_dicts = Domain.merge_dicts(dict1, dict2, override_existing_values) - return list(merged_dicts.values()) + def _sanitize_intents_in_domain_dict(data: Dict[Text, Any]) -> Dict[Text, Any]: + if not data.get(KEY_INTENTS): + return data + + for intent in data.get(KEY_INTENTS, []): + if isinstance(intent, dict): + Domain._reset_intent_flags(intent) + + data[KEY_INTENTS] = Domain._sort_intent_names_alphabetical_order( + intents=data.get(KEY_INTENTS) + ) + + return data @staticmethod def collect_slots(slot_dict: Dict[Text, Any]) -> List[Slot]: @@ -629,7 +631,7 @@ def collect_intent_properties( """ # make a copy to not alter the input argument intents = copy.deepcopy(intents) - intent_properties = {} + intent_properties: Dict[Text, Any] = {} duplicates = set() for intent in intents: @@ -697,10 +699,10 @@ def __init__( responses: Dict[Text, List[Dict[Text, Any]]], action_names: List[Text], forms: Union[Dict[Text, Any], List[Text]], + data: Dict, action_texts: Optional[List[Text]] = None, store_entities_as_slots: bool = True, session_config: SessionConfig = SessionConfig.default(), - duplicates: Optional[Dict[Text, List[Text]]] = None, ) -> None: """Creates a `Domain`. @@ -712,14 +714,12 @@ def __init__( will send the matching response to the user. action_names: Names of custom actions. forms: Form names and their slot mappings. + data: original domain dict representation. action_texts: End-to-End bot utterances from end-to-end stories. store_entities_as_slots: If `True` Rasa will automatically create `SlotSet` events for entities if there are slots with the same name as the entity. session_config: Configuration for conversation sessions. Conversations are restarted at the end of a session. - duplicates: A dictionary where keys are `intents`, `slots`, `forms` and - `responses` and values are lists of duplicated entries of a - corresponding type when the domain is built from multiple files. """ self.entities, self.roles, self.groups = self.collect_entity_properties( entities @@ -738,9 +738,16 @@ def __init__( self.responses = responses - self.action_texts = action_texts or [] + self.action_texts = action_texts if action_texts is not None else [] + + data_copy = copy.deepcopy(data) + self._data = self._preprocess_domain_dict( + data_copy, + store_entities_as_slots, + session_config, + ) + self.session_config = session_config - self.duplicates = duplicates self._custom_actions = action_names @@ -844,14 +851,30 @@ def fingerprint(self) -> Text: fingerprint of the domain """ self_as_dict = self.as_dict() - self_as_dict[ - KEY_INTENTS - ] = rasa.shared.utils.common.sort_list_of_dicts_by_first_key( - self_as_dict[KEY_INTENTS] - ) + transformed_intents: List[Text] = [] + for intent in self_as_dict.get(KEY_INTENTS, []): + if isinstance(intent, dict): + transformed_intents.append(*intent.keys()) + elif isinstance(intent, str): + transformed_intents.append(intent) + + self_as_dict[KEY_INTENTS] = sorted(transformed_intents) self_as_dict[KEY_ACTIONS] = self.action_names_or_texts return rasa.shared.utils.io.get_dictionary_fingerprint(self_as_dict) + @staticmethod + def _sort_intent_names_alphabetical_order( + intents: List[Union[Text, Dict]] + ) -> List[Union[Text, Dict]]: + def sort(elem: Union[Text, Dict]) -> Union[Text, Dict]: + if isinstance(elem, dict): + return list(elem.keys())[0] + elif isinstance(elem, str): + return elem + + sorted_intents = sorted(intents, key=sort) + return sorted_intents + @rasa.shared.utils.common.lazy_property def user_actions_and_forms(self) -> List[Text]: """Returns combination of user actions and forms.""" @@ -1295,7 +1318,7 @@ def states_for_tracker_history( Return: A list of states. """ - states = [] + states: List[State] = [] last_ml_action_sub_state = None turn_was_hidden = False for tr, hide_rule_turn in tracker.generate_all_prior_trackers(): @@ -1415,29 +1438,9 @@ def compare_with_specification(self, path: Text) -> bool: else: return True - def _slot_definitions(self) -> Dict[Any, Dict[str, Any]]: - # Only persist slots defined by the user. We add the default slots on the - # fly when loading the domain. - return {slot.name: slot.persistence_info() for slot in self._user_slots} - def as_dict(self) -> Dict[Text, Any]: """Return serialized `Domain`.""" - return { - "config": {"store_entities_as_slots": self.store_entities_as_slots}, - SESSION_CONFIG_KEY: { - SESSION_EXPIRATION_TIME_KEY: ( - self.session_config.session_expiration_time - ), - CARRY_OVER_SLOTS_KEY: self.session_config.carry_over_slots, - }, - KEY_INTENTS: self._transform_intents_for_file(), - KEY_ENTITIES: self._transform_entities_for_file(), - KEY_SLOTS: self._slot_definitions(), - KEY_RESPONSES: self.responses, - KEY_ACTIONS: self._custom_actions, - KEY_FORMS: self.forms, - KEY_E2E_ACTIONS: self.action_texts, - } + return self._data @staticmethod def get_responses_with_multilines( @@ -1466,141 +1469,29 @@ def get_responses_with_multilines( return final_responses - def _transform_intents_for_file( - self, - ) -> List[Dict[Text, Dict[Text, Union[bool, List[Text]]]]]: - """Transform intent properties for displaying or writing into a domain file. - - Internally, there is a property `used_entities` that lists all entities to be - used. In domain files, `use_entities` or `ignore_entities` is used instead to - list individual entities to ex- or include, because this is easier to read. - - Returns: - The intent properties as they are used in domain files. - """ - intent_properties = copy.deepcopy(self.intent_properties) - sorted_intent_properties = sorted(intent_properties.items()) - intents_for_file = [] - - for intent_name, intent_props in sorted_intent_properties: - if ( - intent_name in rasa.shared.core.constants.DEFAULT_INTENTS - and intent_name not in self.overridden_default_intents - ): - # Default intents should be not dumped with the domain - continue - # `use_entities` and `ignore_entities` in the domain file do not consider - # the role and group labels remove them from the list to make sure to not - # put them into the domain file - use_entities = set( - entity - for entity in intent_props[USED_ENTITIES_KEY] - if rasa.shared.core.constants.ENTITY_LABEL_SEPARATOR not in entity - ) - ignore_entities = set(self.entities) - use_entities - if len(use_entities) == len(self.entities): - intent_props[USE_ENTITIES_KEY] = True - elif len(use_entities) <= len(self.entities) / 2: - entities = list(use_entities) - entities.sort() - intent_props[USE_ENTITIES_KEY] = entities - else: - entities = list(ignore_entities) - entities.sort() - intent_props[IGNORE_ENTITIES_KEY] = entities - intent_props.pop(USED_ENTITIES_KEY) - intents_for_file.append({intent_name: intent_props}) - - return intents_for_file - - def _transform_entities_for_file(self) -> List[Union[Text, Dict[Text, Any]]]: - """Transform entity properties for displaying or writing to a domain file. - - Returns: - The entity properties as they are used in domain files. - """ - entities_for_file: List[Union[Text, Dict[Text, Any]]] = [] - - for entity in self.entities: - if entity in self.roles and entity in self.groups: - entities_for_file.append( - { - entity: { - ENTITY_GROUPS_KEY: self.groups[entity], - ENTITY_ROLES_KEY: self.roles[entity], - } - } - ) - elif entity in self.roles: - entities_for_file.append( - {entity: {ENTITY_ROLES_KEY: self.roles[entity]}} - ) - elif entity in self.groups: - entities_for_file.append( - {entity: {ENTITY_GROUPS_KEY: self.groups[entity]}} - ) - else: - entities_for_file.append(entity) - - return entities_for_file - - def cleaned_domain(self) -> Dict[Text, Any]: - """Fetch cleaned domain to display or write into a file. - - The internal `used_entities` property is replaced by `use_entities` or - `ignore_entities` and redundant keys are replaced with default values - to make the domain easier readable. + @staticmethod + def _cleaned_data(data: Dict[Text, Any]) -> Dict[Text, Any]: + """Remove empty and redundant keys from merged domain dict. Returns: A cleaned dictionary version of the domain. """ - domain_data = self.as_dict() - # remove e2e actions from domain before we display it - domain_data.pop(KEY_E2E_ACTIONS, None) - - for idx, intent_info in enumerate(domain_data[KEY_INTENTS]): - for name, intent in intent_info.items(): - if intent.get(USE_ENTITIES_KEY) is True: - del intent[USE_ENTITIES_KEY] - if not intent.get(IGNORE_ENTITIES_KEY): - intent.pop(IGNORE_ENTITIES_KEY, None) - if len(intent) == 0: - domain_data[KEY_INTENTS][idx] = name - - for slot in domain_data[KEY_SLOTS].values(): - if slot["initial_value"] is None: - del slot["initial_value"] - if slot["type"].startswith("rasa.shared.core.slots"): - slot["type"] = Slot.resolve_by_type(slot["type"]).type_name - - if domain_data["config"]["store_entities_as_slots"]: - del domain_data["config"]["store_entities_as_slots"] - - # clean empty keys return { key: val - for key, val in domain_data.items() + for key, val in data.items() if val != {} and val != [] and val is not None } def persist(self, filename: Union[Text, Path]) -> None: """Write domain to a file.""" - as_yaml = self.as_yaml(clean_before_dump=False) + as_yaml = self.as_yaml() rasa.shared.utils.io.write_text_file(as_yaml, filename) - def persist_clean(self, filename: Union[Text, Path]) -> None: - """Write cleaned domain to a file.""" - as_yaml = self.as_yaml(clean_before_dump=True) - rasa.shared.utils.io.write_text_file(as_yaml, filename) - - def as_yaml(self, clean_before_dump: bool = False) -> Text: + def as_yaml(self) -> Text: """Dump the `Domain` object as a YAML string. + This function preserves the orders of the keys in the domain. - Args: - clean_before_dump: When set to `True`, this method returns - a version of the domain without internal - information. Defaults to `False`. Returns: A string in YAML format representing the domain. """ @@ -1608,12 +1499,13 @@ def as_yaml(self, clean_before_dump: bool = False) -> Text: # thanks to the `should_preserve_key_order` argument # of `dump_obj_as_yaml_to_string` domain_data: Dict[Text, Any] = { - KEY_TRAINING_DATA_FORMAT_VERSION: LATEST_TRAINING_DATA_FORMAT_VERSION + KEY_TRAINING_DATA_FORMAT_VERSION: DoubleQuotedScalarString( + LATEST_TRAINING_DATA_FORMAT_VERSION + ) } - if clean_before_dump: - domain_data.update(self.cleaned_domain()) - else: - domain_data.update(self.as_dict()) + + domain_data.update(self.as_dict()) + if domain_data.get(KEY_RESPONSES, {}): domain_data[KEY_RESPONSES] = self.get_responses_with_multilines( domain_data[KEY_RESPONSES] @@ -1638,7 +1530,6 @@ def _slots_for_domain_warnings(self) -> List[Text]: Excludes slots which aren't featurized. """ - return [slot.name for slot in self._user_slots if slot.influence_conversation] @property @@ -1647,7 +1538,6 @@ def _actions_for_domain_warnings(self) -> List[Text]: Includes user and form actions, but excludes those that are default actions. """ - return [ action for action in self.user_actions_and_forms @@ -1659,15 +1549,16 @@ def _get_symmetric_difference( domain_elements: Union[List[Text], Set[Text]], training_data_elements: Optional[Union[List[Text], Set[Text]]], ) -> Dict[Text, Set[Text]]: - """Get symmetric difference between a set of domain elements and a set of - training data elements. + """Gets the symmetric difference between two sets. + + One set represents domain elements and the other one is a set of training + data elements. Returns a dictionary containing a list of items found in the `domain_elements` but not in `training_data_elements` at key `in_domain`, and a list of items found in `training_data_elements` but not in `domain_elements` at key `in_training_data_set`. """ - if training_data_elements is None: training_data_elements = set() @@ -1944,6 +1835,35 @@ def __repr__(self) -> Text: ) +def warn_about_duplicates_found_during_domain_merging( + duplicates: Dict[Text, List[Text]] +) -> None: + """Emits warning about found duplicates while loading multiple domain paths.""" + message = "" + for key in [ + KEY_INTENTS, + KEY_FORMS, + KEY_ACTIONS, + KEY_E2E_ACTIONS, + KEY_RESPONSES, + KEY_SLOTS, + KEY_ENTITIES, + ]: + duplicates_per_key = duplicates.get(key) + if duplicates_per_key: + if message: + message += " \n" + + duplicates_per_key_str = ", ".join(duplicates_per_key) + message += ( + f"The following duplicated {key} have been found " + f"across multiple domain files: {duplicates_per_key_str}" + ) + + rasa.shared.utils.io.raise_warning(message, docs=DOCS_URL_DOMAINS) + return None + + def _validate_forms(forms: Union[Dict, List]) -> None: if not isinstance(forms, dict): raise InvalidDomain("Forms have to be specified as dictionary.") diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py index 92fbf266b0bb..8164f673281e 100644 --- a/rasa/shared/core/events.py +++ b/rasa/shared/core/events.py @@ -182,7 +182,7 @@ def split_events( The split events. """ sub_events = [] - current = [] + current: List["Event"] = [] def event_fulfills_splitting_condition(evt: "Event") -> bool: # event does not have the correct type @@ -735,6 +735,9 @@ def apply_to(self, tracker: "DialogueStateTracker") -> None: # a user message is always followed by action listen return + if not tracker.latest_message: + return + # update previous user message's featurization based on this event tracker.latest_message.use_text_for_featurization = ( self.use_text_for_featurization @@ -815,6 +818,9 @@ def apply_to(self, tracker: "DialogueStateTracker") -> None: # a user message always comes after action listen return + if not tracker.latest_message: + return + for entity in self.entities: if entity not in tracker.latest_message.entities: tracker.latest_message.entities.append(entity) diff --git a/rasa/shared/core/generator.py b/rasa/shared/core/generator.py index 3b6d0c4c02a1..a4b17ee31581 100644 --- a/rasa/shared/core/generator.py +++ b/rasa/shared/core/generator.py @@ -61,8 +61,8 @@ def __init__( super().__init__( sender_id, slots, max_event_history, is_rule_tracker=is_rule_tracker ) - self._states_for_hashing = None - self.domain = domain + self._states_for_hashing: Deque[FrozenState] = deque() + self.domain = domain if domain is not None else Domain.empty() # T/F property to filter augmented stories self.is_augmented = is_augmented @@ -104,7 +104,7 @@ def past_states_for_hashing( # if don't have it cached, we use the domain to calculate the states # from the events states_for_hashing = self._states_for_hashing - if states_for_hashing is None: + if not states_for_hashing: states = super().past_states(domain, omit_unset_slots=omit_unset_slots) states_for_hashing = deque(self.freeze_current_state(s) for s in states) @@ -146,7 +146,7 @@ def past_states( def clear_states(self) -> None: """Reset the states.""" - self._states_for_hashing = None + self._states_for_hashing = deque() def init_copy(self) -> "TrackerWithCachedStates": """Create a new state tracker with the same initial values.""" @@ -193,8 +193,7 @@ def update(self, event: Event, skip_states: bool = False) -> None: """Modify the state of the tracker according to an ``Event``.""" # if `skip_states` is `True`, this function behaves exactly like the # normal update of the `DialogueStateTracker` - - if self._states_for_hashing is None and not skip_states: + if not self._states_for_hashing and not skip_states: # rest of this function assumes we have the previous state # cached. let's make sure it is there. self._states_for_hashing = self.past_states_for_hashing(self.domain) @@ -263,7 +262,7 @@ def __init__( rand=random.Random(42), ) # hashed featurization of all finished trackers - self.hashed_featurizations = set() + self.hashed_featurizations: Set[int] = set() @staticmethod def _phase_name(everything_reachable_is_reached: bool, phase: int) -> Text: @@ -344,8 +343,8 @@ def _generate( min_num_aug_phases = 0 # placeholder to track gluing process of checkpoints - used_checkpoints = set() - previous_unused = set() + used_checkpoints: Set[Text] = set() + previous_unused: Set[Text] = set() everything_reachable_is_reached = False # we will continue generating data until we have reached all diff --git a/rasa/shared/core/slot_mappings.py b/rasa/shared/core/slot_mappings.py index 18eb65c6e3bf..6e31bb26fa76 100644 --- a/rasa/shared/core/slot_mappings.py +++ b/rasa/shared/core/slot_mappings.py @@ -54,7 +54,7 @@ def validate(mapping: Dict[Text, Any], slot_name: Text) -> None: f"{DOCS_URL_SLOTS} for more information." ) - validations = { + validations: Dict[SlotMappingType, List[Text]] = { SlotMappingType.FROM_ENTITY: ["entity"], SlotMappingType.FROM_INTENT: ["value"], SlotMappingType.FROM_TRIGGER_INTENT: ["value"], @@ -62,7 +62,7 @@ def validate(mapping: Dict[Text, Any], slot_name: Text) -> None: SlotMappingType.CUSTOM: [], } - required_keys = validations.get(mapping_type) + required_keys = validations[mapping_type] for required_key in required_keys: if mapping.get(required_key) is None: raise InvalidDomain( @@ -114,7 +114,10 @@ def intent_is_desired( ) ) - intent = tracker.latest_message.intent.get(INTENT_NAME_KEY) + if tracker.latest_message: + intent = tracker.latest_message.intent.get(INTENT_NAME_KEY) + else: + intent = None intent_not_blocked = not mapping_intents and intent not in mapping_not_intents @@ -145,7 +148,10 @@ def entity_is_desired( True, if slot should be filled, false otherwise. """ slot_fulfils_entity_mapping = False - extracted_entities = tracker.latest_message.entities + if tracker.latest_message: + extracted_entities = tracker.latest_message.entities + else: + extracted_entities = [] for entity in extracted_entities: if ( diff --git a/rasa/shared/core/slots.py b/rasa/shared/core/slots.py index bb3496b3aaca..495add19062a 100644 --- a/rasa/shared/core/slots.py +++ b/rasa/shared/core/slots.py @@ -282,7 +282,8 @@ def _as_feature(self) -> List[float]: # we couldn't convert the value to a list - using default value return [0.0] - @Slot.value.setter + # FIXME: https://github.com/python/mypy/issues/8085 + @Slot.value.setter # type: ignore[attr-defined] def value(self, value: Any) -> None: """Sets the slot's value.""" if value and not isinstance(value, list): @@ -290,7 +291,8 @@ def value(self, value: Any) -> None: value = [value] # Call property setter of superclass - super(ListSlot, self.__class__).value.fset(self, value) + # FIXME: https://github.com/python/mypy/issues/8085 + super(ListSlot, self.__class__).value.fset(self, value) # type: ignore[attr-defined] # noqa: E501 class CategoricalSlot(Slot): diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py index f9982fb6505b..dbe1efa4c3d3 100644 --- a/rasa/shared/core/trackers.py +++ b/rasa/shared/core/trackers.py @@ -12,6 +12,7 @@ Iterator, Generator, Type, + TypeVar, List, Deque, Iterable, @@ -64,7 +65,7 @@ from rasa.shared.core.slots import Slot if TYPE_CHECKING: - from typing_extensions import TypedDict + from typing_extensions import TypedDict, TypeAlias from rasa.shared.core.events import NLUPredictionData from rasa.shared.core.training_data.structures import Story @@ -82,6 +83,8 @@ total=False, ) + EventTypeAlias: TypeAlias = TypeVar("EventTypeAlias", Event) + logger = logging.getLogger(__name__) @@ -401,6 +404,8 @@ def get_latest_entity_values( Returns: Entity values. """ + if self.latest_message is None: + return iter([]) return ( x.get(ENTITY_ATTRIBUTE_VALUE) @@ -479,7 +484,7 @@ def applied_events(self) -> List[Event]: if isinstance(event, ActiveLoop) and event.name ] - applied_events = [] + applied_events: List[Event] = [] for event in self.events: if isinstance(event, (Restarted, SessionStarted)): @@ -717,11 +722,11 @@ def export_stories_to_file(self, export_path: Text = "debug_stories.yml") -> Non def get_last_event_for( self, - event_type: Union[Type[Event], Tuple[Type, ...]], + event_type: Union[Type["EventTypeAlias"], Tuple[Type["EventTypeAlias"], ...]], action_names_to_exclude: List[Text] = None, skip: int = 0, event_verbosity: EventVerbosity = EventVerbosity.APPLIED, - ) -> Optional[Event]: + ) -> Optional["EventTypeAlias"]: """Gets the last event of a given type which was actually applied. Args: @@ -735,7 +740,6 @@ def get_last_event_for( Returns: event which matched the query or `None` if no event matched. """ - to_exclude = action_names_to_exclude or [] def filter_function(e: Event) -> bool: @@ -843,6 +847,9 @@ def latest_action_name(self) -> Optional[Text]: Returns: name of the previously executed action or text of e2e action """ + if self.latest_action is None: + return None + return self.latest_action.get(ACTION_NAME) or self.latest_action.get( ACTION_TEXT ) diff --git a/rasa/shared/core/training_data/story_reader/story_reader.py b/rasa/shared/core/training_data/story_reader/story_reader.py index 6d2a32785f49..80ffbc976637 100644 --- a/rasa/shared/core/training_data/story_reader/story_reader.py +++ b/rasa/shared/core/training_data/story_reader/story_reader.py @@ -25,7 +25,7 @@ def __init__( domain: Domain object. source_name: Name of the training data source. """ - self.story_steps = [] + self.story_steps: List[StoryStep] = [] self.current_step_builder: Optional[StoryStepBuilder] = None self.domain = domain self.source_name = source_name @@ -91,6 +91,8 @@ def _parse_events( def _add_event(self, event_name: Text, parameters: Dict[Text, Any]) -> None: parsed_events = self._parse_events(event_name, parameters) + if parsed_events is None: + parsed_events = [] if self.current_step_builder is None: raise StoryParseError( diff --git a/rasa/shared/core/training_data/story_reader/story_step_builder.py b/rasa/shared/core/training_data/story_reader/story_step_builder.py index a156d2b335cf..91b80d37b10a 100644 --- a/rasa/shared/core/training_data/story_reader/story_step_builder.py +++ b/rasa/shared/core/training_data/story_reader/story_step_builder.py @@ -23,13 +23,13 @@ def __init__( ) -> None: self.name = name self.source_name = source_name - self.story_steps = [] - self.current_steps = [] - self.start_checkpoints = [] + self.story_steps: List[StoryStep] = [] + self.current_steps: List[StoryStep] = [] + self.start_checkpoints: List[Checkpoint] = [] self.is_rule = is_rule def add_checkpoint(self, name: Text, conditions: Optional[Dict[Text, Any]]) -> None: - + """Add a checkpoint to story steps.""" # Depending on the state of the story part this # is either a start or an end check point if not self.current_steps: diff --git a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py index 4eaeaef6bb52..aaba8ef17b4f 100644 --- a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py +++ b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py @@ -321,7 +321,8 @@ def _parse_user_utterance(self, step: Dict[Text, Any]) -> None: else: self._validate_that_utterance_is_in_domain(utterance) - self.current_step_builder.add_user_messages([utterance]) + if self.current_step_builder is not None: + self.current_step_builder.add_user_messages([utterance]) def _validate_that_utterance_is_in_domain(self, utterance: UserUttered) -> None: intent_name = utterance.intent.get(INTENT_NAME_KEY) @@ -347,7 +348,7 @@ def _validate_that_utterance_is_in_domain(self, utterance: UserUttered) -> None: def _parse_or_statement(self, step: Dict[Text, Any]) -> None: events: List = [] - for item in step.get(KEY_OR): + for item in step.get(KEY_OR, []): if KEY_USER_INTENT in item.keys(): utterance = self._parse_raw_user_utterance(item) if utterance: @@ -385,7 +386,7 @@ def _parse_or_statement(self, step: Dict[Text, Any]) -> None: ) return - if events: + if events and self.current_step_builder is not None: self.current_step_builder.add_events(events) def _user_intent_from_step( diff --git a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py index 24645c0a7364..80d3aee8d37c 100644 --- a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py +++ b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py @@ -213,9 +213,12 @@ def process_user_utterance( for entity in user_utterance.entities: if "value" in entity: if hasattr(user_utterance, "inline_comment_for_entity"): - for predicted in user_utterance.predicted_entities: + # FIXME: to fix this type issue, WronglyClassifiedUserUtterance + # needs to be imported but it's currently outside + # of `rasa.shared` + for predicted in user_utterance.predicted_entities: # type: ignore[attr-defined] # noqa: E501 if predicted["start"] == entity["start"]: - commented_entity = user_utterance.inline_comment_for_entity( # noqa: E501 + commented_entity = user_utterance.inline_comment_for_entity( # type: ignore[attr-defined] # noqa: E501 predicted, entity ) if commented_entity: @@ -241,7 +244,9 @@ def process_user_utterance( result[KEY_ENTITIES] = entities if hasattr(user_utterance, "inline_comment"): - comment = user_utterance.inline_comment( + # FIXME: to fix this type issue, WronglyClassifiedUserUtterance needs to + # be imported but it's currently outside of `rasa.shared` + comment = user_utterance.inline_comment( # type: ignore[attr-defined] force_comment_generation=not entities ) if comment: @@ -283,7 +288,9 @@ def process_action(action: ActionExecuted) -> Optional[OrderedDict]: result[KEY_BOT_END_TO_END_MESSAGE] = action.action_text if hasattr(action, "inline_comment"): - comment = action.inline_comment() + # FIXME: to fix this type issue, WarningPredictedAction needs to + # be imported but it's currently outside of `rasa.shared` + comment = action.inline_comment() # type: ignore[attr-defined] if KEY_ACTION in result and comment: result.yaml_add_eol_comment(comment, KEY_ACTION) elif KEY_BOT_END_TO_END_MESSAGE in result and comment: @@ -395,11 +402,13 @@ def process_rule_step(self, rule_step: RuleStep) -> OrderedDict: if normal_steps: result[KEY_STEPS] = normal_steps - if len(normal_events) > 1 and ( - isinstance(normal_events[len(normal_events) - 1], ActionExecuted) - and normal_events[len(normal_events) - 1].action_name - == rasa.shared.core.constants.RULE_SNIPPET_ACTION_NAME - ): - result[KEY_WAIT_FOR_USER_INPUT_AFTER_RULE] = False + if len(normal_events) > 1: + last_event = normal_events[len(normal_events) - 1] + if ( + isinstance(last_event, ActionExecuted) + and last_event.action_name + == rasa.shared.core.constants.RULE_SNIPPET_ACTION_NAME + ): + result[KEY_WAIT_FOR_USER_INPUT_AFTER_RULE] = False return result diff --git a/rasa/shared/core/training_data/structures.py b/rasa/shared/core/training_data/structures.py index 685b6f024d52..1f31c93506f4 100644 --- a/rasa/shared/core/training_data/structures.py +++ b/rasa/shared/core/training_data/structures.py @@ -7,6 +7,7 @@ from typing import ( List, Text, + Deque, Dict, Optional, Tuple, @@ -598,7 +599,7 @@ def _remove_unused_generated_cps( unused_genr_cps = { cp_name for cp_name in unused_cps - if cp_name.startswith(GENERATED_CHECKPOINT_PREFIX) + if cp_name is not None and cp_name.startswith(GENERATED_CHECKPOINT_PREFIX) } k_to_remove = set() @@ -716,7 +717,7 @@ def topological_sort( # noinspection PyPep8Naming GRAY, BLACK = 0, 1 - ordered = deque() + ordered: Deque = deque() unprocessed = sorted(set(graph)) visited_nodes = {} diff --git a/rasa/shared/core/training_data/visualization.py b/rasa/shared/core/training_data/visualization.py index a237ac2f63a4..6ee507e623e5 100644 --- a/rasa/shared/core/training_data/visualization.py +++ b/rasa/shared/core/training_data/visualization.py @@ -1,7 +1,18 @@ from collections import defaultdict, deque import random -from typing import Any, Text, List, Dict, Optional, TYPE_CHECKING, Set +from typing import ( + Any, + Text, + List, + Deque, + Dict, + Optional, + Set, + TYPE_CHECKING, + Union, + cast, +) import rasa.shared.utils.io from rasa.shared.constants import INTENT_MESSAGE_PREFIX @@ -88,7 +99,7 @@ def _fingerprint_node( # the candidate list contains all node paths that haven't been # extended till `max_history` length yet. - candidates = deque() + candidates: Deque = deque() candidates.append([node]) continuations = [] while len(candidates) > 0: @@ -324,20 +335,26 @@ def persist_graph(graph: "networkx.Graph", output_file: Text) -> None: def _length_of_common_action_prefix(this: List[Event], other: List[Event]) -> int: """Calculate number of actions that two conversations have in common.""" - num_common_actions = 0 - t_cleaned = [e for e in this if e.type_name in {"user", "action"}] - o_cleaned = [e for e in other if e.type_name in {"user", "action"}] + t_cleaned = cast( + List[Union[ActionExecuted, UserUttered]], + [e for e in this if e.type_name in {"user", "action"}], + ) + o_cleaned = cast( + List[Union[ActionExecuted, UserUttered]], + [e for e in other if e.type_name in {"user", "action"}], + ) for i, e in enumerate(t_cleaned): + o = o_cleaned[i] if i == len(o_cleaned): break - elif isinstance(e, UserUttered) and isinstance(o_cleaned[i], UserUttered): + elif isinstance(e, UserUttered) and isinstance(o, UserUttered): continue elif ( isinstance(e, ActionExecuted) - and isinstance(o_cleaned[i], ActionExecuted) - and o_cleaned[i].action_name == e.action_name + and isinstance(o, ActionExecuted) + and o.action_name == e.action_name ): num_common_actions += 1 else: @@ -462,16 +479,20 @@ def visualize_neighborhood( # this can either be an ellipsis "...", the conversation end node # "END" or a "TMP" node if this is the active conversation if is_current: + event_idx = events[idx] if ( - isinstance(events[idx], ActionExecuted) - and events[idx].action_name == ACTION_LISTEN_NAME + isinstance(event_idx, ActionExecuted) + and event_idx.action_name == ACTION_LISTEN_NAME ): next_node_idx += 1 + if message is None: + label = " ? " + else: + intent = cast(dict, message).get("intent", {}) + label = intent.get("name", " ? ") graph.add_node( next_node_idx, - label=" ? " - if not message - else message.get("intent", {}).get("name", " ? "), + label=label, shape="rect", **{"class": "intent dashed active"}, ) diff --git a/rasa/shared/exceptions.py b/rasa/shared/exceptions.py index 19b983b940ee..d5ba847054bb 100644 --- a/rasa/shared/exceptions.py +++ b/rasa/shared/exceptions.py @@ -2,6 +2,11 @@ from typing import Optional, Text import jsonschema +from ruamel.yaml.error import ( + MarkedYAMLError, + MarkedYAMLWarning, + MarkedYAMLFutureWarning, +) class RasaException(Exception): @@ -54,8 +59,16 @@ def __str__(self) -> Text: exception_text = "Failed to read YAML." if self.underlying_yaml_exception: - self.underlying_yaml_exception.warn = None - self.underlying_yaml_exception.note = None + if isinstance( + self.underlying_yaml_exception, + (MarkedYAMLError, MarkedYAMLWarning, MarkedYAMLFutureWarning), + ): + self.underlying_yaml_exception.note = None + if isinstance( + self.underlying_yaml_exception, + (MarkedYAMLWarning, MarkedYAMLFutureWarning), + ): + self.underlying_yaml_exception.warn = None exception_text += f" {self.underlying_yaml_exception}" if self.filename: diff --git a/rasa/shared/importers/importer.py b/rasa/shared/importers/importer.py index dabb9624eb6f..247dd89de2ec 100644 --- a/rasa/shared/importers/importer.py +++ b/rasa/shared/importers/importer.py @@ -6,7 +6,13 @@ import rasa.shared.utils.common import rasa.shared.core.constants import rasa.shared.utils.io -from rasa.shared.core.domain import Domain +from rasa.shared.core.domain import ( + Domain, + KEY_E2E_ACTIONS, + KEY_INTENTS, + KEY_RESPONSES, + KEY_ACTIONS, +) from rasa.shared.core.events import ActionExecuted, UserUttered from rasa.shared.core.training_data.structures import StoryGraph from rasa.shared.nlu.training_data.message import Message @@ -55,10 +61,9 @@ def get_config(self) -> Dict: """ raise NotImplementedError() - @rasa.shared.utils.common.cached_method def get_config_file_for_auto_config(self) -> Optional[Text]: """Returns config file path for auto-config only if there is a single one.""" - return self.config_file + raise NotImplementedError() def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData: """Retrieves the NLU training data that should be used for training. @@ -173,8 +178,7 @@ def _importer_from_dict( importer_config, importer_class ) - # mypy ignore needed because RasaFileImporter and MultiFI have different args - return importer_class( # type: ignore[call-arg] + return importer_class( config_path, domain_path, training_data_paths, **constructor_arguments ) @@ -243,7 +247,9 @@ def get_domain(self) -> Domain: domains = [importer.get_domain() for importer in self._importers] return reduce( - lambda merged, other: merged.merge(other), domains, Domain.empty() + lambda merged, other: merged.merge(other), + domains, + Domain.empty(), ) @rasa.shared.utils.common.cached_method @@ -324,7 +330,9 @@ def get_domain(self) -> Domain: existing_domain, ) - existing_domain = existing_domain.merge(domain_with_retrieval_intents) + existing_domain = existing_domain.merge( + domain_with_retrieval_intents, override=True + ) existing_domain.check_missing_responses() return existing_domain @@ -376,13 +384,16 @@ def _get_domain_with_retrieval_intents( intent_properties[IS_RETRIEVAL_INTENT_KEY] = True retrieval_intent_properties.append({intent: intent_properties}) - return Domain( - retrieval_intent_properties, - [], - [], - responses, - ResponsesSyncImporter._construct_retrieval_action_names(retrieval_intents), - {}, + action_names = ResponsesSyncImporter._construct_retrieval_action_names( + retrieval_intents + ) + + return Domain.from_dict( + { + KEY_INTENTS: retrieval_intent_properties, + KEY_RESPONSES: responses, + KEY_ACTIONS: action_names, + } ) def get_stories(self, exclusion_percentage: Optional[int] = None) -> StoryGraph: @@ -437,6 +448,7 @@ def get_domain(self) -> Domain: """Retrieves model domain (see parent class for full docstring).""" original = self.importer.get_domain() e2e_domain = self._get_domain_with_e2e_actions() + return original.merge(e2e_domain) def _get_domain_with_e2e_actions(self) -> Domain: @@ -455,15 +467,7 @@ def _get_domain_with_e2e_actions(self) -> Domain: additional_e2e_action_names = list(additional_e2e_action_names) - return Domain( - [], - [], - [], - {}, - action_names=[], - forms={}, - action_texts=additional_e2e_action_names, - ) + return Domain.from_dict({KEY_E2E_ACTIONS: additional_e2e_action_names}) def get_stories(self, exclusion_percentage: Optional[int] = None) -> StoryGraph: """Retrieves the stories that should be used for training. diff --git a/rasa/shared/importers/multi_project.py b/rasa/shared/importers/multi_project.py index 79078066b6af..7b5f443dfb86 100644 --- a/rasa/shared/importers/multi_project.py +++ b/rasa/shared/importers/multi_project.py @@ -32,9 +32,9 @@ def __init__( else: self._domain_paths = [] self._story_paths = [] - self._e2e_story_paths = [] + self._e2e_story_paths: List[Text] = [] self._nlu_paths = [] - self._imports = [] + self._imports: List[Text] = [] self._additional_paths = training_data_paths or [] self._project_directory = project_directory or os.path.dirname(config_file) @@ -179,7 +179,9 @@ def get_domain(self) -> Domain: """Retrieves model domain (see parent class for full docstring).""" domains = [Domain.load(path) for path in self._domain_paths] return reduce( - lambda merged, other: merged.merge(other), domains, Domain.empty() + lambda merged, other: merged.merge(other), + domains, + Domain.empty(), ) def get_stories(self, exclusion_percentage: Optional[int] = None) -> StoryGraph: diff --git a/rasa/shared/importers/rasa.py b/rasa/shared/importers/rasa.py index 2426cfb9cd89..53ba35b5ee80 100644 --- a/rasa/shared/importers/rasa.py +++ b/rasa/shared/importers/rasa.py @@ -3,7 +3,8 @@ from typing import Dict, List, Optional, Text, Union import rasa.shared.data - +import rasa.shared.utils.common +import rasa.shared.utils.io from rasa.shared.core.training_data.structures import StoryGraph from rasa.shared.importers import utils from rasa.shared.importers.importer import TrainingDataImporter @@ -12,7 +13,6 @@ from rasa.shared.core.training_data.story_reader.yaml_story_reader import ( YAMLStoryReader, ) -import rasa.shared.utils.io logger = logging.getLogger(__name__) @@ -50,6 +50,11 @@ def get_config(self) -> Dict: config = rasa.shared.utils.io.read_model_configuration(self.config_file) return config + @rasa.shared.utils.common.cached_method + def get_config_file_for_auto_config(self) -> Optional[Text]: + """Returns config file path for auto-config only if there is a single one.""" + return self.config_file + def get_stories(self, exclusion_percentage: Optional[int] = None) -> StoryGraph: """Retrieves training stories / rules (see parent class for full docstring).""" return utils.story_graph_from_paths( diff --git a/rasa/shared/nlu/training_data/formats/rasa_yaml.py b/rasa/shared/nlu/training_data/formats/rasa_yaml.py index adaf46d43c2f..9bfc060c5ced 100644 --- a/rasa/shared/nlu/training_data/formats/rasa_yaml.py +++ b/rasa/shared/nlu/training_data/formats/rasa_yaml.py @@ -429,7 +429,8 @@ def process_intents(cls, training_data: "TrainingData") -> List[OrderedDict]: @classmethod def process_synonyms(cls, training_data: "TrainingData") -> List[OrderedDict]: - inverted_synonyms = OrderedDict() + """Serializes the synonyms.""" + inverted_synonyms: Dict[Text, List[Dict]] = OrderedDict() for example, synonym in training_data.entity_synonyms.items(): if not inverted_synonyms.get(synonym): inverted_synonyms[synonym] = [] @@ -444,7 +445,8 @@ def process_synonyms(cls, training_data: "TrainingData") -> List[OrderedDict]: @classmethod def process_regexes(cls, training_data: "TrainingData") -> List[OrderedDict]: - inverted_regexes = OrderedDict() + """Serializes the regexes.""" + inverted_regexes: Dict[Text, List[Text]] = OrderedDict() for regex in training_data.regex_features: if not inverted_regexes.get(regex["name"]): inverted_regexes[regex["name"]] = [] diff --git a/rasa/shared/nlu/training_data/formats/readerwriter.py b/rasa/shared/nlu/training_data/formats/readerwriter.py index b9fde20830ce..50475f2b1f86 100644 --- a/rasa/shared/nlu/training_data/formats/readerwriter.py +++ b/rasa/shared/nlu/training_data/formats/readerwriter.py @@ -77,12 +77,13 @@ def dumps(self, training_data: "TrainingData") -> Text: raise NotImplementedError @staticmethod - def prepare_training_examples(training_data: "TrainingData") -> OrderedDict: + def prepare_training_examples( + training_data: "TrainingData", + ) -> Dict[Text, List[Union[Dict, Text]]]: """Pre-processes training data examples by removing not trainable entities.""" - import rasa.shared.nlu.training_data.util as rasa_nlu_training_data_utils - training_examples = OrderedDict() + training_examples: Dict[Text, List[Union[Dict, Text]]] = OrderedDict() # Sort by intent while keeping basic intent order for example in [e.as_dict_nlu() for e in training_data.training_examples]: diff --git a/rasa/shared/nlu/training_data/loading.py b/rasa/shared/nlu/training_data/loading.py index 335f023d429a..163303fa5cf4 100644 --- a/rasa/shared/nlu/training_data/loading.py +++ b/rasa/shared/nlu/training_data/loading.py @@ -2,7 +2,7 @@ import logging import os import typing -from typing import Optional, Text, Callable, Dict, Any +from typing import Optional, Text, Callable, Dict, Any, List import rasa.shared.utils.io from rasa.shared.nlu.training_data.formats.dialogflow import ( @@ -54,13 +54,13 @@ def load_data(resource_name: Text, language: Optional[Text] = "en") -> "Training files = rasa.shared.utils.io.list_files(resource_name) data_sets = [_load(f, language) for f in files] - data_sets = [ds for ds in data_sets if ds] - if len(data_sets) == 0: + training_data_sets: List[TrainingData] = [ds for ds in data_sets if ds] + if len(training_data_sets) == 0: training_data = TrainingData() - elif len(data_sets) == 1: - training_data = data_sets[0] + elif len(training_data_sets) == 1: + training_data = training_data_sets[0] else: - training_data = data_sets[0].merge(*data_sets[1:]) + training_data = training_data_sets[0].merge(*data_sets[1:]) return training_data diff --git a/rasa/shared/utils/common.py b/rasa/shared/utils/common.py index aafda03e92eb..ed690b442362 100644 --- a/rasa/shared/utils/common.py +++ b/rasa/shared/utils/common.py @@ -201,3 +201,73 @@ def arguments_of(func: Callable) -> List[Text]: import inspect return list(inspect.signature(func).parameters.keys()) + + +def extract_duplicates(list1: List[Any], list2: List[Any]) -> List[Any]: + """Extracts duplicates from two lists.""" + if list1: + dict1 = { + (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i for i in list1 + } + else: + dict1 = {} + + if list2: + dict2 = { + (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i for i in list2 + } + else: + dict2 = {} + + set1 = set(dict1.keys()) + set2 = set(dict2.keys()) + dupes = set1.intersection(set2) + return sorted(list(dupes)) + + +def clean_duplicates(dupes: Dict[Text, Any]) -> Dict[Text, Any]: + """Removes keys for empty values.""" + duplicates = dupes.copy() + for k in dupes: + if not dupes[k]: + duplicates.pop(k) + + return duplicates + + +def merge_dicts( + tempDict1: Dict[Text, Any], + tempDict2: Dict[Text, Any], + override_existing_values: bool = False, +) -> Dict[Text, Any]: + """Merges two dicts.""" + if override_existing_values: + merged_dicts, b = tempDict1.copy(), tempDict2.copy() + + else: + merged_dicts, b = tempDict2.copy(), tempDict1.copy() + merged_dicts.update(b) + return merged_dicts + + +def merge_lists( + list1: List[Any], list2: List[Any], override: bool = False +) -> List[Any]: + """Merges two lists.""" + return sorted(list(set(list1 + list2))) + + +def merge_lists_of_dicts( + dict_list1: List[Dict], + dict_list2: List[Dict], + override_existing_values: bool = False, +) -> List[Dict]: + """Merges two dict lists.""" + dict1 = { + (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i for i in dict_list1 + } + dict2 = { + (sorted(list(i.keys()))[0] if isinstance(i, dict) else i): i for i in dict_list2 + } + merged_dicts = merge_dicts(dict1, dict2, override_existing_values) + return list(merged_dicts.values()) diff --git a/rasa/shared/utils/io.py b/rasa/shared/utils/io.py index 05216751d06e..122fd256f31a 100644 --- a/rasa/shared/utils/io.py +++ b/rasa/shared/utils/io.py @@ -164,7 +164,7 @@ def list_directory(path: Text) -> List[Text]: if os.path.isfile(path): return [path] elif os.path.isdir(path): - results = [] + results: List[Text] = [] for base, dirs, files in os.walk(path, followlinks=True): # sort files for same order across runs files = sorted(files, key=_filename_without_prefix) @@ -302,7 +302,6 @@ def construct_yaml_str(self: BaseConstructor, node: ScalarNode) -> Any: yaml.Loader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str) yaml.SafeLoader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str) - yaml.allow_duplicate_keys = False def replace_environment_variables() -> None: diff --git a/rasa/telemetry.py b/rasa/telemetry.py index 107d0de0f03c..592f0ec10512 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -986,10 +986,15 @@ def track_core_model_test(num_story_steps: int, e2e: bool, agent: "Agent") -> No e2e: indicator if tests running in end to end mode agent: Agent of the model getting tested """ + if agent.processor is None: + project_fingerprint = "" + else: + project_fingerprint = agent.processor.model_metadata.project_fingerprint + _track( TELEMETRY_TEST_CORE_EVENT, { - "project": agent.processor.model_metadata.project_fingerprint, + "project": project_fingerprint, "end_to_end": e2e, "num_story_steps": num_story_steps, }, diff --git a/rasa/utils/common.py b/rasa/utils/common.py index 9397239ba81f..9a8c42f9f018 100644 --- a/rasa/utils/common.py +++ b/rasa/utils/common.py @@ -1,6 +1,7 @@ import asyncio import copy import logging +import logging.handlers import os import shutil import warnings diff --git a/rasa/utils/tensorflow/data_generator.py b/rasa/utils/tensorflow/data_generator.py index d4d4476858dc..c9a9f8d235f1 100644 --- a/rasa/utils/tensorflow/data_generator.py +++ b/rasa/utils/tensorflow/data_generator.py @@ -367,7 +367,7 @@ def __init__( # actual batch size will be set inside `on_epoch_end` self._current_batch_size = 0 # create separate data variable that will store modified data for each batch - self._data = {} + self._data: Data = {} self.on_epoch_end() def __len__(self) -> int: diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 0e0a53d15513..c1740910a5dc 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -254,7 +254,7 @@ def __init__( self.label_sub_key = label_sub_key # should be updated when features are added self.num_examples = self.number_of_examples() - self.sparse_feature_sizes = {} + self.sparse_feature_sizes: Dict[Text, Dict[Text, List[int]]] = {} def get( self, key: Text, sub_key: Optional[Text] = None @@ -321,7 +321,7 @@ def first_data_example(self) -> Data: Returns: The simplified data. """ - out_data = {} + out_data: Data = {} for key, attribute_data in self.data.items(): out_data[key] = {} for sub_key, features in attribute_data.items(): @@ -417,7 +417,7 @@ def number_of_units(self, key: Text, sub_key: Text) -> int: units = 0 for features in self.data[key][sub_key]: if len(features) > 0: - units += features.units + units += features.units # type: ignore[operator] return units @@ -569,7 +569,7 @@ def split( for data in attribute_data.values() for v in data ] - solo_values = [ + solo_values: List[Any] = [ [] for attribute_data in self.data.values() for data in attribute_data.values() @@ -695,7 +695,9 @@ def balanced_data(self, data: Data, batch_size: int, shuffle: bool) -> Data: # if a label was skipped in current batch skipped = [False] * num_label_ids - new_data = defaultdict(lambda: defaultdict(list)) + new_data: defaultdict[ + Text, defaultdict[Text, List[List[FeatureArray]]] + ] = defaultdict(lambda: defaultdict(list)) while min(num_data_cycles) == 0: if shuffle: @@ -846,8 +848,12 @@ def _convert_train_test_split( Returns: The test and train RasaModelData """ - data_train = defaultdict(lambda: defaultdict(list)) - data_val = defaultdict(lambda: defaultdict(list)) + data_train: defaultdict[ + Text, defaultdict[Text, List[FeatureArray]] + ] = defaultdict(lambda: defaultdict(list)) + data_val: defaultdict[Text, defaultdict[Text, List[Any]]] = defaultdict( + lambda: defaultdict(list) + ) # output_values = x_train, x_val, y_train, y_val, z_train, z_val, etc. # order is kept, e.g. same order as model data keys diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index f2c4a96d451e..ab738af10fb8 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -50,9 +50,11 @@ def featurize_training_examples( A dictionary of attribute to feature sizes. """ output = [] + if not entity_tag_specs: + entity_tag_specs = [] for example in training_examples: - attribute_to_features = {} + attribute_to_features: Dict[Text, List["Features"]] = {} for attribute in attributes: if attribute == ENTITIES: attribute_to_features[attribute] = [] @@ -458,25 +460,25 @@ def _extract_features( attribute_mask[i] = 0 list_of_features = fake_features - for features in list_of_features: + for feature in list_of_features: # in case of ENTITIES, if the attribute type matches either 'entity', # 'role', or 'group' the features correspond to the tag ids of that # entity type in order to distinguish later on between the different # tag ids, we use the entity type as key - if attribute == ENTITIES and features.attribute in [ + if attribute == ENTITIES and feature.attribute in [ ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_GROUP, ENTITY_ATTRIBUTE_ROLE, ]: - key = features.attribute + key = feature.attribute else: - key = features.type + key = feature.type # all features should have the same types - if features.is_sparse(): - dialogue_sparse_features[key].append(features.features) + if feature.is_sparse(): + dialogue_sparse_features[key].append(feature.features) else: - dialogue_dense_features[key].append(features.features) + dialogue_dense_features[key].append(feature.features) for key, value in dialogue_sparse_features.items(): sparse_features[key].append(value) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 43856112d3b5..6a09bd343ca3 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -295,7 +295,7 @@ def run_inference( Returns: Model outputs corresponding to the inputs fed. """ - outputs = {} + outputs: Dict[Text, Union[np.ndarray, Dict[Text, Any]]] = {} (data_generator, _) = rasa.utils.train_utils.create_data_generators( model_data=model_data, batch_sizes=batch_size, epochs=1, shuffle=False ) diff --git a/rasa/utils/tensorflow/rasa_layers.py b/rasa/utils/tensorflow/rasa_layers.py index 8b68eceaa167..cea20adbf115 100644 --- a/rasa/utils/tensorflow/rasa_layers.py +++ b/rasa/utils/tensorflow/rasa_layers.py @@ -127,8 +127,7 @@ def _replace_dense_for_sparse_layer( """ kernel = layer_to_replace.get_kernel().numpy() bias = layer_to_replace.get_bias() - use_bias = False if bias is None else True - if use_bias: + if bias is not None: bias = bias.numpy() units = layer_to_replace.get_units() # split kernel by feature sizes to update the layer accordingly @@ -155,12 +154,12 @@ def _replace_dense_for_sparse_layer( # stack each merged weight to form a new weight tensor new_weights = np.vstack(merged_weights) kernel_init = tf.constant_initializer(new_weights) - bias_init = tf.constant_initializer(bias) if use_bias else None + bias_init = tf.constant_initializer(bias) if bias is not None else None new_layer = layers.DenseForSparse( name=f"sparse_to_dense.{attribute}_{feature_type}", reg_lambda=reg_lambda, units=units, - use_bias=use_bias, + use_bias=bias is not None, kernel_initializer=kernel_init, bias_initializer=bias_init, ) @@ -233,7 +232,7 @@ def __init__( ) # Prepare dropout and sparse-to-dense layers if any sparse tensors are expected - self._tf_layers = {} + self._tf_layers: Dict[Text, tf.keras.layers.Layer] = {} if any([signature.is_sparse for signature in feature_type_signature]): self._prepare_layers_for_sparse_tensors(attribute, feature_type, config) @@ -404,7 +403,7 @@ def __init__( super().__init__(name=f"rasa_feature_combining_layer_{attribute}") - self._tf_layers = {} + self._tf_layers: Dict[Text, tf.keras.layers.Layer] = {} # Prepare sparse-dense combining layers for each present feature type self._feature_types_present = self._get_present_feature_types( diff --git a/rasa/utils/tensorflow/temp_keras_modules.py b/rasa/utils/tensorflow/temp_keras_modules.py index 57a605c38552..cb9c40b05176 100644 --- a/rasa/utils/tensorflow/temp_keras_modules.py +++ b/rasa/utils/tensorflow/temp_keras_modules.py @@ -1,5 +1,5 @@ import copy -from typing import List, Dict, Union, Optional, Any, Generator, Tuple, Iterator +from typing import List, Dict, Union, Optional, Any, Generator, Tuple, Iterator, cast import numpy as np @@ -358,11 +358,12 @@ def fit( epochs=epochs, steps=data_handler.inferred_steps, ) + callbacks_list = cast(callbacks_module.CallbackList, callbacks) self.stop_training = False self.train_function = self.make_train_function() self._train_counter.assign(0) - callbacks.on_train_begin() + callbacks_list.on_train_begin() training_logs = None # Handle fault-tolerance for multi-worker. # TODO(omalleyt): Fix the ordering issues that mean this has to @@ -373,7 +374,7 @@ def fit( logs = None for epoch, iterator in data_handler.enumerate_epochs(): self.reset_metrics() - callbacks.on_epoch_begin(epoch) + callbacks_list.on_epoch_begin(epoch) with data_handler.catch_stop_iteration(): for step in data_handler.steps(): with tf.profiler.experimental.Trace( @@ -383,13 +384,13 @@ def fit( batch_size=batch_size, _r=1, ): - callbacks.on_train_batch_begin(step) + callbacks_list.on_train_batch_begin(step) tmp_logs = self.train_function(iterator) if data_handler.should_sync: context.async_wait() logs = tmp_logs # No error, now safe to assign to logs. end_step = step + data_handler.step_increment - callbacks.on_train_batch_end(end_step, logs) + callbacks_list.on_train_batch_end(end_step, logs) if self.stop_training: break @@ -429,7 +430,7 @@ def fit( sample_weight=val_sample_weight, batch_size=validation_batch_size or batch_size, steps=validation_steps, - callbacks=callbacks, + callbacks=callbacks_list, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, @@ -439,7 +440,7 @@ def fit( val_logs = {"val_" + name: val for name, val in val_logs.items()} epoch_logs.update(val_logs) - callbacks.on_epoch_end(epoch, epoch_logs) + callbacks_list.on_epoch_end(epoch, epoch_logs) training_logs = epoch_logs if self.stop_training: break @@ -447,7 +448,7 @@ def fit( # If eval_data_handler exists, delete it after all epochs are done. if getattr(self, "_eval_data_handler", None) is not None: del self._eval_data_handler - callbacks.on_train_end(logs=training_logs) + callbacks_list.on_train_end(logs=training_logs) return self.history diff --git a/rasa/validator.py b/rasa/validator.py index f91087459994..8f3a1ef6c19f 100644 --- a/rasa/validator.py +++ b/rasa/validator.py @@ -1,8 +1,13 @@ import itertools import logging +import queue from collections import defaultdict +from platform import node +from re import A from typing import Set, Text, Optional, Dict, Any, List +from numpy import False_ + import rasa.core.training.story_conflict import rasa.shared.nlu.constants from rasa.shared.constants import ( @@ -16,13 +21,7 @@ from rasa.shared.core.constants import MAPPING_CONDITIONS, ACTIVE_LOOP from rasa.shared.core.events import ActionExecuted, ActiveLoop from rasa.shared.core.events import UserUttered -from rasa.shared.core.domain import ( - KEY_INTENTS, - KEY_RESPONSES, - KEY_SLOTS, - KEY_FORMS, - Domain, -) +from rasa.shared.core.domain import Domain from rasa.shared.core.generator import TrainingDataGenerator from rasa.shared.core.constants import SlotMappingType, MAPPING_TYPE from rasa.shared.core.training_data.structures import StoryGraph @@ -30,6 +29,10 @@ from rasa.shared.nlu.training_data.training_data import TrainingData import rasa.shared.utils.io +from rasa.shared.core.training_data.structures import ( + STORY_START, +) + logger = logging.getLogger(__name__) @@ -97,6 +100,47 @@ def verify_intents(self, ignore_warnings: bool = True) -> bool: everything_is_alright = False return everything_is_alright + + def verify_loop_in_intents( + self, ignore_warnings: bool = True + ) -> bool: + row = queue.Queue() + nodes = dict() + visited = dict() + loops_cp = [] + + everything_is_alright = True + + for story in self.story_graph.story_steps: + start_cp = story.start_checkpoints[0].name + if start_cp not in nodes: + nodes[start_cp] = [] + visited[start_cp] = False + if len(story.end_checkpoints) > 0: + end_cp = story.end_checkpoints[0].name + if(end_cp not in nodes): + nodes[end_cp] = [] + visited[end_cp] = False + nodes[start_cp].append(end_cp) + + if STORY_START in nodes: + row.put(STORY_START) + while not row.empty(): + x = row.get() + visited[x] = True + for node in nodes[x]: + if visited[node]: + loops_cp.append(f"{x} => {node}") + everything_is_alright = ignore_warnings and everything_is_alright + else: + row.put(node) + + if(len(loops_cp) > 0): + rasa.shared.utils.io.raise_warning( + f"These checkpoints '{loops_cp}' is causing loop" + ) + + return everything_is_alright def verify_example_repetition_in_intents( self, ignore_warnings: bool = True @@ -190,6 +234,10 @@ def verify_utterances_in_stories(self, ignore_warnings: bool = True) -> bool: for event in story.events: if not isinstance(event, ActionExecuted): continue + + if not event.action_name: + continue + if not event.action_name.startswith(UTTER_PREFIX): # we are only interested in utter actions continue @@ -259,6 +307,9 @@ def verify_actions_in_stories_rules(self) -> bool: if not isinstance(event, ActionExecuted): continue + if not event.action_name: + continue + if not event.action_name.startswith("action_"): continue @@ -326,36 +377,14 @@ def verify_nlu(self, ignore_warnings: bool = True) -> bool: there_is_no_duplication = self.verify_example_repetition_in_intents( ignore_warnings ) + + logger.info("Validating loop of checkpoints...") + loop_in_checkpoint = self.verify_loop_in_intents(ignore_warnings) logger.info("Validating utterances...") stories_are_valid = self.verify_utterances_in_stories(ignore_warnings) - return intents_are_valid and stories_are_valid and there_is_no_duplication - - def verify_domain_duplicates(self) -> bool: - """Verifies that there are no duplicated dictionaries in multiple domain files. - - Returns: - `True` if duplicates exist. - """ - logger.info("Checking duplicates across domain files...") - - all_valid = True - - if not self.domain.duplicates: - return True - - for key in [KEY_INTENTS, KEY_FORMS, KEY_RESPONSES, KEY_SLOTS]: - duplicates = self.domain.duplicates.get(key) - if duplicates: - duplicates_str = ", ".join(duplicates) - rasa.shared.utils.io.raise_warning( - f"The following duplicated {key} has been found " - + f"across multiple domain files: {duplicates_str}", - docs=DOCS_URL_DOMAINS, - ) - all_valid = False - - return all_valid + return (intents_are_valid and stories_are_valid and there_is_no_duplication + and loop_in_checkpoint) def verify_form_slots(self) -> bool: """Verifies that form slots match the slot mappings in domain.""" diff --git a/scripts/release.py b/scripts/release.py index f23f06152d46..f215f4bd0138 100644 --- a/scripts/release.py +++ b/scripts/release.py @@ -285,7 +285,8 @@ def next_version(args: argparse.Namespace) -> Version: def generate_changelog(version: Version) -> None: """Call tonwcrier and create a changelog from all available changelog entries.""" check_call( - ["towncrier", "--yes", "--version", str(version)], cwd=str(project_root()) + ["towncrier", "build", "--yes", "--version", str(version)], + cwd=str(project_root()), ) diff --git a/setup.cfg b/setup.cfg index 22ed56bddf9b..c729c20197f3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,5 +47,4 @@ disallow_untyped_decorators = True # FIXME: working our way towards removing these # see https://github.com/RasaHQ/rasa/pull/6470 # the list below is sorted by the number of errors for each error code, in decreasing order -disable_error_code = arg-type, assignment, var-annotated, union-attr, - override, attr-defined, misc +disable_error_code = arg-type, assignment, override, misc diff --git a/stubs/redis/__init__.py b/stubs/redis/__init__.py deleted file mode 100644 index bd0758a85a8c..000000000000 --- a/stubs/redis/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Text, List, overload, Optional, Union, Mapping, Literal - -from redis import ConnectionPool -from typing_extensions import Protocol - -# We should switch to https://pypi.org/project/types-redis/ once -# https://github.com/python/typeshed/issues/5065 is fixed. -class StrictRedis(Protocol): - @overload - def __init__( - self, - host: Text = ..., - port: int = ..., - db: int = ..., - password: Optional[Text] = ..., - socket_timeout: Optional[float] = ..., - socket_connect_timeout: Optional[float] = ..., - socket_keepalive: Optional[bool] = ..., - socket_keepalive_options: Optional[Mapping[str, Union[int, str]]] = ..., - connection_pool: Optional[ConnectionPool] = ..., - unix_socket_path: Optional[Text] = ..., - encoding: Text = ..., - encoding_errors: Text = ..., - charset: Optional[Text] = ..., - errors: Optional[Text] = ..., - decode_responses: Literal[False] = ..., - retry_on_timeout: bool = ..., - ssl: bool = ..., - ssl_keyfile: Optional[Text] = ..., - ssl_certfile: Optional[Text] = ..., - ssl_cert_reqs: Optional[Union[str, int]] = ..., - ssl_ca_certs: Optional[Text] = ..., - ssl_check_hostname: bool = ..., - max_connections: Optional[int] = ..., - single_connection_client: bool = ..., - health_check_interval: float = ..., - client_name: Optional[Text] = ..., - username: Optional[Text] = ..., - ) -> None: - ... - - def keys(self, pattern: Text) -> List[Text]: - ... diff --git a/stubs/sanic/__init__.pyi b/stubs/sanic/__init__.pyi index cd253e0281fd..610d5112a38e 100644 --- a/stubs/sanic/__init__.pyi +++ b/stubs/sanic/__init__.pyi @@ -1,4 +1,18 @@ from sanic.__version__ import __version__ from sanic.app import Sanic +from sanic.blueprints import Blueprint +from sanic.constants import HTTPMethod +from sanic.request import Request +from sanic.response import HTTPResponse, html, json, text -__all__ = ["Sanic", "__version__"] +__all__ = [ + "__version__", + "Sanic", + "Blueprint", + "HTTPMethod", + "HTTPResponse", + "Request", + "html", + "json", + "text", +] diff --git a/stubs/sanic/app.pyi b/stubs/sanic/app.pyi index 7e69b7a91abe..7f7b4154e0ec 100644 --- a/stubs/sanic/app.pyi +++ b/stubs/sanic/app.pyi @@ -1,7 +1,6 @@ -from sanic.app import Sanic as SanicSanic - +# mypy check fails here but it actually successfully loads the initial module +# so it's probably an internal issue of mypy with no repercussions +from sanic.app import Sanic as SanicSanic # type: ignore[attr-defined] class Sanic(SanicSanic): - - def stop(self) -> None: - ... + def stop(self) -> None: ... diff --git a/stubs/sanic/blueprints.pyi b/stubs/sanic/blueprints.pyi new file mode 100644 index 000000000000..7160108c3b6c --- /dev/null +++ b/stubs/sanic/blueprints.pyi @@ -0,0 +1,10 @@ +from typing import Any, Dict, Text + +from sanic.app import Sanic + +# mypy check fails here but it actually successfully loads the initial module +# so it's probably an internal issue of mypy with no repercussions +from sanic.blueprints import Blueprint as SanicBlueprint # type: ignore[attr-defined] + +class Blueprint(SanicBlueprint): + def register(self, app: Sanic, options: Dict[Text, Any]) -> None: ... diff --git a/stubs/sanic/exceptions.pyi b/stubs/sanic/exceptions.pyi deleted file mode 100644 index e7f461144d50..000000000000 --- a/stubs/sanic/exceptions.pyi +++ /dev/null @@ -1,3 +0,0 @@ -from typing import NoReturn, Optional, Text - -def abort(status_code: int, message: Optional[Text] = None) -> NoReturn: ... diff --git a/tests/cli/test_rasa_data.py b/tests/cli/test_rasa_data.py index fd671ef95cd3..700d2beaf1e9 100644 --- a/tests/cli/test_rasa_data.py +++ b/tests/cli/test_rasa_data.py @@ -9,6 +9,7 @@ from _pytest.monkeypatch import MonkeyPatch from _pytest.pytester import RunResult from rasa.cli import data +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.importers.importer import TrainingDataImporter from rasa.validator import Validator import rasa.shared.utils.io @@ -156,7 +157,7 @@ def test_validate_files_action_not_found_invalid_domain( file_name = tmp_path / f"{file_type}.yml" file_name.write_text( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" {file_type}: - {data_type}: test path steps: @@ -183,7 +184,7 @@ def test_validate_files_form_not_found_invalid_domain( file_name = tmp_path / f"{file_type}.yml" file_name.write_text( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" {file_type}: - {data_type}: test path steps: @@ -229,8 +230,8 @@ def test_validate_files_with_active_loop_null(tmp_path: Path): def test_validate_files_form_slots_not_matching(tmp_path: Path): domain_file_name = tmp_path / "domain.yml" domain_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" forms: name_form: required_slots: @@ -290,7 +291,7 @@ def test_validate_files_invalid_slot_mappings(tmp_path: Path): slot_name = "started_booking_form" domain.write_text( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - activate_booking entities: diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py index 0f8476f404d2..947297701389 100644 --- a/tests/cli/test_rasa_test.py +++ b/tests/cli/test_rasa_test.py @@ -47,7 +47,7 @@ def test_test_core_warnings(run_in_simple_project_with_model: Callable[..., RunR ) simple_test_story_yaml = """ -version: "3.0" +version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: unlikely path steps: diff --git a/tests/core/actions/test_forms.py b/tests/core/actions/test_forms.py index 335e7fd2fede..ee8714d15d90 100644 --- a/tests/core/actions/test_forms.py +++ b/tests/core/actions/test_forms.py @@ -10,7 +10,11 @@ from rasa.core.policies.policy import PolicyPrediction from rasa.core.actions import action from rasa.core.actions.action import ActionExecutionRejection, ActionExtractSlots -from rasa.shared.constants import REQUIRED_SLOTS_KEY, IGNORED_INTENTS +from rasa.shared.constants import ( + LATEST_TRAINING_DATA_FORMAT_VERSION, + REQUIRED_SLOTS_KEY, + IGNORED_INTENTS, +) from rasa.shared.core.constants import ACTION_LISTEN_NAME, REQUESTED_SLOT from rasa.core.actions.forms import FormAction from rasa.core.channels import CollectingOutputChannel @@ -119,7 +123,7 @@ async def test_switch_forms_with_same_slot(default_agent: Agent): utter_ask_form_2 = f"Please provide the value for {slot_a} of form 2" domain = f""" -version: "3.0" +version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" nlu: - intent: order_status examples: | @@ -448,7 +452,7 @@ async def test_validate_slots( tracker = DialogueStateTracker.from_events(sender_id="bla", evts=events) domain = f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - num_tables @@ -722,7 +726,7 @@ def test_temporary_tracker(): sender_id = "test" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: {extra_slot}: type: any @@ -1407,8 +1411,8 @@ async def test_extract_other_slots_with_matched_mapping_conditions(): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intent: - greet - inform @@ -1479,8 +1483,8 @@ async def test_extract_other_slots_raises_no_matched_conditions(): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intent: - greet - inform @@ -1549,8 +1553,8 @@ async def test_extract_other_slots_raises_no_matched_conditions(): async def test_action_extract_slots_custom_mapping_with_condition(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: custom_slot: @@ -1613,8 +1617,8 @@ async def test_action_extract_slots_custom_mapping_with_condition(): async def test_form_slots_empty_with_restart(): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intent: - greet - inform diff --git a/tests/core/actions/test_two_stage_fallback.py b/tests/core/actions/test_two_stage_fallback.py index d98604891d75..4b916fd5b0d9 100644 --- a/tests/core/actions/test_two_stage_fallback.py +++ b/tests/core/actions/test_two_stage_fallback.py @@ -4,7 +4,10 @@ from rasa.core.policies.policy import PolicyPrediction from rasa.core.processor import MessageProcessor -from rasa.shared.constants import DEFAULT_NLU_FALLBACK_INTENT_NAME +from rasa.shared.constants import ( + DEFAULT_NLU_FALLBACK_INTENT_NAME, + LATEST_TRAINING_DATA_FORMAT_VERSION, +) from rasa.core.actions.two_stage_fallback import TwoStageFallbackAction from rasa.core.channels import CollectingOutputChannel from rasa.shared.core.domain import Domain @@ -156,7 +159,7 @@ async def test_ask_rephrase_after_failed_affirmation(): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_ask_rephrase: - text: {rephrase_text} diff --git a/tests/core/evaluation/test_marker.py b/tests/core/evaluation/test_marker.py index ea4536fcb2ff..1dc88c095ba2 100644 --- a/tests/core/evaluation/test_marker.py +++ b/tests/core/evaluation/test_marker.py @@ -568,7 +568,7 @@ def test_domain_validation_with_valid_marker(depth: int, max_branches: int, seed slots = [Slot(name, []) for name in _collect_parameters(marker, SlotSetMarker)] actions = list(_collect_parameters(marker, ActionExecutedMarker)) intents = _collect_parameters(marker, IntentDetectedMarker) - domain = Domain(intents, [], slots, {}, actions, {}) + domain = Domain(intents, [], slots, {}, actions, {}, {}) assert marker.validate_against_domain(domain) diff --git a/tests/core/featurizers/test_precomputation.py b/tests/core/featurizers/test_precomputation.py index 4fef7d71a515..2ab4880d851c 100644 --- a/tests/core/featurizers/test_precomputation.py +++ b/tests/core/featurizers/test_precomputation.py @@ -353,6 +353,7 @@ def test_container_derive_messages_from_domain_and_add(): entities=["e_a", "e_b", "e_c"], slots=[Slot(name="s", mappings=[{}])], forms=forms, + data={}, ) lookup_table = MessageContainerForCoreFeaturization() lookup_table.derive_messages_from_domain_and_add(domain) @@ -382,6 +383,7 @@ def test_converter_for_training(input_converter: CoreFeaturizationInputConverter responses=dict(), action_names=["action_listen", "utter_greet"], forms=dict(), + data={}, action_texts=["Hi how are you?"], ) events = [ diff --git a/tests/core/featurizers/test_single_state_featurizers.py b/tests/core/featurizers/test_single_state_featurizers.py index cf84cf55e194..8293f79216d3 100644 --- a/tests/core/featurizers/test_single_state_featurizers.py +++ b/tests/core/featurizers/test_single_state_featurizers.py @@ -93,6 +93,7 @@ def test_prepare_for_training(): responses={}, forms={}, action_names=["utter_greet", "action_check_weather"], + data={}, ) f = SingleStateFeaturizer() @@ -125,6 +126,7 @@ def test_encode_all_labels__encoded_all_action_names_and_texts(): responses={}, forms={}, action_names=["a", "b", "c", "d"], + data={}, ) f = SingleStateFeaturizer() @@ -452,6 +454,7 @@ def test_encode_entities__with_entity_roles_and_groups(): responses={}, forms={}, action_names=[], + data={}, ) f = SingleStateFeaturizer() f.prepare_for_training(domain) @@ -484,6 +487,7 @@ def test_encode_entities__with_bilou_entity_roles_and_groups(): responses={}, forms={}, action_names=[], + data={}, ) f = SingleStateFeaturizer() f.prepare_for_training(domain, bilou_tagging=True) diff --git a/tests/core/nlg/test_response.py b/tests/core/nlg/test_response.py index 2952f75dbbca..7db9984dd710 100644 --- a/tests/core/nlg/test_response.py +++ b/tests/core/nlg/test_response.py @@ -5,6 +5,7 @@ from _pytest.logging import LogCaptureFixture from rasa.core.nlg.response import TemplatedNaturalLanguageGenerator +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.core.domain import Domain from rasa.shared.core.slots import TextSlot, AnySlot, CategoricalSlot, BooleanSlot from rasa.shared.core.trackers import DialogueStateTracker @@ -250,7 +251,7 @@ async def test_nlg_conditional_response_variations_with_diff_slot_types( async def test_nlg_non_matching_channel(): domain = Domain.from_yaml( """ - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_hi: - text: "Hello" @@ -266,8 +267,8 @@ async def test_nlg_non_matching_channel(): async def test_nlg_conditional_response_variations_with_none_slot(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_action: - text: "text A" @@ -288,8 +289,8 @@ async def test_nlg_conditional_response_variations_with_none_slot(): async def test_nlg_conditional_response_variations_with_slot_not_a_constraint(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_action: - text: "text A" @@ -310,8 +311,8 @@ async def test_nlg_conditional_response_variations_with_slot_not_a_constraint(): async def test_nlg_conditional_response_variations_with_null_slot(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_action: - text: "text for null" @@ -336,8 +337,8 @@ async def test_nlg_conditional_response_variations_with_null_slot(): async def test_nlg_conditional_response_variations_channel_no_condition_met(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_action: - text: "example with channel" @@ -357,8 +358,8 @@ async def test_nlg_conditional_response_variations_channel_no_condition_met(): async def test_nlg_conditional_response_variation_condition_met_channel_mismatch(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_action: - text: "example with channel" @@ -423,8 +424,8 @@ async def test_nlg_conditional_response_variation_condition_met_channel_mismatch ) async def test_nlg_conditional_edgecases(slots, channel, expected_response): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_action: - text: "condition example A with channel" @@ -466,8 +467,8 @@ async def test_nlg_conditional_response_variations_condition_logging( caplog: LogCaptureFixture, ): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_action: - text: "example" diff --git a/tests/core/policies/test_rule_policy.py b/tests/core/policies/test_rule_policy.py index 5d075642d4ed..51f93f0f02af 100644 --- a/tests/core/policies/test_rule_policy.py +++ b/tests/core/policies/test_rule_policy.py @@ -7,7 +7,10 @@ from rasa.engine.graph import ExecutionContext from rasa.engine.storage.resource import Resource from rasa.engine.storage.storage import ModelStorage -from rasa.shared.constants import DEFAULT_NLU_FALLBACK_INTENT_NAME +from rasa.shared.constants import ( + DEFAULT_NLU_FALLBACK_INTENT_NAME, + LATEST_TRAINING_DATA_FORMAT_VERSION, +) from rasa.core import training from rasa.core.actions.action import ActionDefaultFallback @@ -149,7 +152,7 @@ def test_potential_contradiction_resolved_by_conversation_start(policy: RulePoli utter_anti_greet_action = "utter_anti_greet" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -202,7 +205,7 @@ def test_potential_contradiction_resolved_by_conversation_start_when_slot_initia some_slot_initial_value = "slot1value" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -275,7 +278,7 @@ def test_potential_contradiction_resolved_by_conversation_start_when_slot_initia some_slot_initial_value = "slot1value" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -340,7 +343,7 @@ def test_potential_contradiction_resolved_by_conversation_start_when_slot_initia def test_restrict_multiple_user_inputs_in_rules(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -372,7 +375,7 @@ def test_incomplete_rules_due_to_slots(policy: RulePolicy): some_slot = "some_slot" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -442,7 +445,7 @@ def test_no_incomplete_rules_due_to_slots_after_listen(policy: RulePolicy): some_slot = "some_slot" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -504,7 +507,7 @@ def test_no_incomplete_rules_due_to_additional_slots_set(policy: RulePolicy): some_other_slot_value = "value2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -559,7 +562,7 @@ def test_incomplete_rules_due_to_loops(policy: RulePolicy): some_form = "some_form" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} forms: @@ -624,7 +627,7 @@ def test_contradicting_rules(policy: RulePolicy): utter_anti_greet_action = "utter_anti_greet" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -664,7 +667,7 @@ def test_contradicting_rules_and_stories(policy: RulePolicy): utter_anti_greet_action = "utter_anti_greet" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -808,7 +811,7 @@ def test_rule_policy_contradicting_rule_finetune( def test_faq_rule(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -835,7 +838,7 @@ async def test_predict_form_action_if_in_form(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -880,7 +883,7 @@ async def test_predict_loop_action_if_in_loop_but_there_is_e2e_rule(policy: Rule domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -936,7 +939,7 @@ async def test_predict_form_action_if_multiple_turns(policy: RulePolicy): other_intent = "bye" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {other_intent} @@ -1085,7 +1088,7 @@ async def test_predict_action_listen_after_form(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1132,7 +1135,7 @@ async def test_dont_predict_form_if_already_finished(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1183,7 +1186,7 @@ async def test_form_unhappy_path(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1228,7 +1231,7 @@ async def test_form_unhappy_path_from_general_rule(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1285,7 +1288,7 @@ async def test_form_unhappy_path_from_in_form_rule(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1361,7 +1364,7 @@ async def test_form_unhappy_path_from_story(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1438,7 +1441,7 @@ async def test_form_unhappy_path_no_validation_from_rule( domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1529,7 +1532,7 @@ async def test_form_unhappy_path_no_validation_from_story(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1599,7 +1602,7 @@ async def test_form_unhappy_path_without_rule(policy: RulePolicy): other_intent = "bye" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {other_intent} @@ -1645,7 +1648,7 @@ async def test_form_activation_rule(policy: RulePolicy): other_intent = "bye" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {other_intent} @@ -1686,7 +1689,7 @@ async def test_failing_form_activation_due_to_no_rule(policy: RulePolicy): other_intent = "bye" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {other_intent} @@ -1727,7 +1730,7 @@ def test_form_submit_rule(policy: RulePolicy): submit_action_name = "utter_submit" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1781,7 +1784,7 @@ def test_immediate_submit(policy: RulePolicy): slot = "some_slot" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1887,7 +1890,7 @@ async def test_rule_policy_slot_filling_from_text( async def test_one_stage_fallback_rule(policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {DEFAULT_NLU_FALLBACK_INTENT_NAME} @@ -1970,7 +1973,7 @@ def test_default_actions( ): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -1998,7 +2001,7 @@ def test_default_actions( def test_e2e_beats_default_actions(intent_name: Text, policy: RulePolicy): domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} actions: @@ -2059,7 +2062,7 @@ def test_predict_core_fallback( other_intent = "other" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {other_intent} @@ -2092,7 +2095,7 @@ def test_predict_nothing_if_fallback_disabled( other_intent = "other" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {other_intent} @@ -2119,7 +2122,7 @@ def test_hide_rule_turn(policy: RulePolicy): action_chitchat = "action_chitchat" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {chitchat} @@ -2199,7 +2202,7 @@ def test_hide_rule_turn_with_slots( some_other_slot_value = "value2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {some_intent} - {some_other_intent} @@ -2326,7 +2329,7 @@ def test_hide_rule_turn_no_last_action_listen( followup_on_chitchat = "followup_on_chitchat" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {chitchat} actions: @@ -2414,7 +2417,7 @@ def test_hide_rule_turn_with_loops( action_chitchat = "action_chitchat" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {activate_form} @@ -2522,7 +2525,7 @@ def test_do_not_hide_rule_turn_with_loops_in_stories(policy: RulePolicy): activate_form = "activate_form" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {activate_form} slots: @@ -2577,7 +2580,7 @@ def test_hide_rule_turn_with_loops_as_followup_action(policy: RulePolicy): activate_form = "activate_form" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {GREET_INTENT_NAME} - {activate_form} @@ -2681,7 +2684,7 @@ def test_remove_action_listen_prediction_if_contradicts_with_story(policy: RuleP utter_2 = "utter_2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} actions: @@ -2726,7 +2729,7 @@ def test_keep_action_listen_prediction_after_predictable_action(policy: RulePoli utter_3 = "utter_3" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} actions: @@ -2772,7 +2775,7 @@ def test_keep_action_listen_prediction_if_last_prediction(policy: RulePolicy): utter_2 = "utter_2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} actions: @@ -2813,7 +2816,7 @@ def test_keep_action_listen_prediction_if_contradicts_with_rule(policy: RulePoli utter_2 = "utter_2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} actions: @@ -2856,7 +2859,7 @@ def test_raise_contradiction_if_rule_contradicts_with_story(policy: RulePolicy): utter_2 = "utter_2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} actions: @@ -2898,7 +2901,7 @@ def test_rule_with_multiple_entities(policy: RulePolicy): utter_1 = "utter_1" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} entities: @@ -2962,7 +2965,7 @@ def test_rule_with_multiple_slots(policy: RulePolicy): slot_2 = "slot_2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} actions: @@ -3030,7 +3033,7 @@ def test_include_action_unlikely_intent(policy: RulePolicy): slot_2 = "slot_2" domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {intent_1} actions: diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py index 93a10b6d6914..8131006ab8c3 100644 --- a/tests/core/policies/test_ted_policy.py +++ b/tests/core/policies/test_ted_policy.py @@ -55,7 +55,7 @@ from rasa.shared.nlu.constants import ACTION_NAME from rasa.utils.tensorflow import model_data_utils from tests.core.test_policies import PolicyTestCollection -from rasa.shared.constants import DEFAULT_SENDER_ID +from rasa.shared.constants import DEFAULT_SENDER_ID, LATEST_TRAINING_DATA_FORMAT_VERSION UTTER_GREET_ACTION = "utter_greet" GREET_INTENT_NAME = "greet" @@ -226,8 +226,8 @@ def test_training_with_no_intent( ): stories = tmp_path / "stories.yml" stories.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: test path steps: diff --git a/tests/core/policies/test_unexpected_intent_policy.py b/tests/core/policies/test_unexpected_intent_policy.py index 07796d2d8daa..ec598015b048 100644 --- a/tests/core/policies/test_unexpected_intent_policy.py +++ b/tests/core/policies/test_unexpected_intent_policy.py @@ -14,6 +14,7 @@ from rasa.core.featurizers.tracker_featurizers import TrackerFeaturizer from rasa.core.featurizers.tracker_featurizers import IntentMaxHistoryTrackerFeaturizer from rasa.nlu.classifiers import LABEL_RANKING_LENGTH +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.core.generator import TrackerWithCachedStates from rasa.core.policies.ted_policy import PREDICTION_FEATURES from rasa.core.policies.unexpected_intent_policy import UnexpecTEDIntentPolicy @@ -138,8 +139,8 @@ def test_training_with_no_intent( ): stories = tmp_path / "stories.yml" stories.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: test path steps: diff --git a/tests/core/test_actions.py b/tests/core/test_actions.py index f5832dca7810..6089f09f0c51 100644 --- a/tests/core/test_actions.py +++ b/tests/core/test_actions.py @@ -27,7 +27,11 @@ from rasa.core.actions.forms import FormAction from rasa.core.channels import CollectingOutputChannel, OutputChannel from rasa.core.nlg import NaturalLanguageGenerator -from rasa.shared.constants import UTTER_PREFIX, REQUIRED_SLOTS_KEY +from rasa.shared.constants import ( + LATEST_TRAINING_DATA_FORMAT_VERSION, + UTTER_PREFIX, + REQUIRED_SLOTS_KEY, +) from rasa.shared.core.domain import ( ActionNotFoundException, SessionConfig, @@ -122,6 +126,7 @@ def test_domain_action_instantiation(): responses={}, action_names=["my_module.ActionTest", "utter_test", "utter_chitchat"], forms={}, + data={}, ) instantiated_actions = [ @@ -1146,8 +1151,8 @@ async def test_action_extract_slots_predefined_mappings( ): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - inform - greet @@ -1229,8 +1234,8 @@ async def test_action_extract_slots_predefined_mappings( async def test_action_extract_slots_with_from_trigger_mappings(): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet - inform @@ -1350,7 +1355,7 @@ async def test_action_extract_slots_with_list_slot( domain = Domain.from_yaml( textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - topping @@ -1449,7 +1454,7 @@ async def test_action_extract_slots_with_matched_mapping_condition(): domain = Domain.from_yaml( textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intent: - greet - inform @@ -1502,7 +1507,7 @@ async def test_action_extract_slots_no_matched_mapping_conditions(): domain = Domain.from_yaml( textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intent: - greet - inform @@ -1746,7 +1751,7 @@ async def test_extract_other_list_slot_from_entity( domain = Domain.from_yaml( textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - topping @@ -1965,8 +1970,8 @@ async def test_action_extract_slots_execute_validation_action( expected_events: List[Event], ): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet @@ -2029,8 +2034,8 @@ async def test_action_extract_slots_execute_validation_action( async def test_action_extract_slots_custom_action_and_predefined_slot_validation(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - inform @@ -2096,8 +2101,8 @@ async def test_action_extract_slots_custom_action_and_predefined_slot_validation async def test_action_extract_slots_with_duplicate_custom_actions(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - inform @@ -2168,8 +2173,8 @@ async def test_action_extract_slots_with_duplicate_custom_actions(): async def test_action_extract_slots_disallowed_events(caplog: LogCaptureFixture): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: custom_slot_one: @@ -2232,8 +2237,8 @@ async def test_action_extract_slots_warns_custom_action_exceptions( caplog: LogCaptureFixture, exception: Exception ): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: custom_slot_one: @@ -2278,8 +2283,8 @@ async def test_action_extract_slots_warns_custom_action_exceptions( async def test_action_extract_slots_with_empty_conditions(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - city @@ -2312,8 +2317,8 @@ async def test_action_extract_slots_with_empty_conditions(): async def test_action_extract_slots_with_not_existing_entity(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - city @@ -2350,8 +2355,8 @@ async def test_action_extract_slots_with_not_existing_entity(): async def test_action_extract_slots_with_not_existing_intent(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet @@ -2389,8 +2394,8 @@ async def test_action_extract_slots_with_not_existing_intent(): async def test_action_extract_slots_with_none_value_predefined_mapping(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - some_entity @@ -2430,8 +2435,8 @@ async def test_action_extract_slots_with_none_value_predefined_mapping(): async def test_action_extract_slots_with_none_value_custom_mapping(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: custom_slot: @@ -2473,8 +2478,8 @@ async def test_action_extract_slots_with_none_value_custom_mapping(): async def test_action_extract_slots_returns_bot_uttered(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: custom_slot: @@ -2521,8 +2526,8 @@ async def test_action_extract_slots_does_not_raise_disallowed_warning_for_slot_e caplog: LogCaptureFixture, ): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: custom_slot_a: @@ -2590,8 +2595,8 @@ async def test_action_extract_slots_does_not_raise_disallowed_warning_for_slot_e async def test_action_extract_slots_non_required_form_slot_with_from_entity_mapping(): domain_yaml = textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - form_start diff --git a/tests/core/test_evaluation.py b/tests/core/test_evaluation.py index 4c624d66c542..a56068a84514 100644 --- a/tests/core/test_evaluation.py +++ b/tests/core/test_evaluation.py @@ -184,7 +184,7 @@ async def test_end_to_evaluation_trips_circuit_breaker( ): config = textwrap.dedent( f""" - version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" policies: - name: MemoizationPolicy max_history: 11 diff --git a/tests/core/test_migrate.py b/tests/core/test_migrate.py index a241a4f55d21..cc68983bc774 100644 --- a/tests/core/test_migrate.py +++ b/tests/core/test_migrate.py @@ -12,6 +12,7 @@ from rasa.core import migrate from rasa.shared.core.domain import Domain from rasa.shared.exceptions import RasaException +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION def prepare_domain_path(directory: Path, domain_content: Text, file_name: Text) -> Path: @@ -80,7 +81,7 @@ def test_migrate_domain_format_with_required_slots( migrated_domain = rasa.shared.utils.io.read_yaml_file(domain_out_file) migrated_training_data_version = migrated_domain.get("version") - assert migrated_training_data_version == '"3.0"' + assert migrated_training_data_version == LATEST_TRAINING_DATA_FORMAT_VERSION migrated_slots = migrated_domain.get("slots") expected_slots = { @@ -348,7 +349,7 @@ def test_migrate_domain_format_from_dir(tmp_path: Path): migrated_file = rasa.shared.utils.io.read_yaml_file(file) migrated_training_data_version = migrated_file.get("version") - assert migrated_training_data_version == '"3.0"' + assert migrated_training_data_version == LATEST_TRAINING_DATA_FORMAT_VERSION def test_migrate_domain_all_keys(tmp_path: Path, domain_out_file: Path): @@ -396,7 +397,7 @@ def test_migrate_domain_all_keys(tmp_path: Path, domain_out_file: Path): assert "action_check_time" in migrated_actions migrated_training_data_version = migrated_domain.get("version") - assert migrated_training_data_version == '"3.0"' + assert migrated_training_data_version == LATEST_TRAINING_DATA_FORMAT_VERSION def test_migrate_domain_format_with_custom_slot(tmp_path: Path, domain_out_file: Path): @@ -767,7 +768,7 @@ def test_migrate_domain_from_dir_with_other_sections(tmp_path: Path): migrated = rasa.shared.utils.io.read_yaml_file(file) migrated_training_data_version = migrated.get("version") - assert migrated_training_data_version == '"3.0"' + assert migrated_training_data_version == LATEST_TRAINING_DATA_FORMAT_VERSION if file.name == domain_file_one: assert migrated.get("entities") == ["outdoor"] diff --git a/tests/core/test_processor.py b/tests/core/test_processor.py index 309acd749e8c..2e138733c3c1 100644 --- a/tests/core/test_processor.py +++ b/tests/core/test_processor.py @@ -1184,6 +1184,7 @@ async def test_logging_of_end_to_end_action( action_names=[], forms={}, action_texts=[end_to_end_action], + data={}, ) default_processor.domain = new_domain @@ -1247,7 +1248,7 @@ async def test_predict_next_action_with_hidden_rules( story_slot = "story_slot" domain_content = textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - {rule_intent} - {story_intent} diff --git a/tests/core/test_test.py b/tests/core/test_test.py index 571837b991c0..8f9aaaa23a87 100644 --- a/tests/core/test_test.py +++ b/tests/core/test_test.py @@ -248,8 +248,8 @@ async def test_action_unlikely_intent_warning( file_name = tmp_path / "test_action_unlikely_intent_1.yml" file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: unlikely path steps: @@ -297,8 +297,8 @@ async def test_action_unlikely_intent_correctly_predicted( file_name = tmp_path / "test_action_unlikely_intent_2.yml" file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: unlikely path (with action_unlikely_intent) steps: @@ -342,8 +342,8 @@ async def test_wrong_action_after_action_unlikely_intent( test_file_name = tmp_path / "test.yml" test_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: happy path steps: @@ -360,8 +360,8 @@ async def test_wrong_action_after_action_unlikely_intent( train_file_name = tmp_path / "train.yml" train_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: happy path steps: @@ -411,8 +411,8 @@ async def test_action_unlikely_intent_not_found( ): test_file_name = tmp_path / "test_action_unlikely_intent_complete.yml" test_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: happy path steps: @@ -430,8 +430,8 @@ async def test_action_unlikely_intent_not_found( train_file_name = tmp_path / "train_without_action_unlikely_intent.yml" train_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: happy path steps: @@ -478,8 +478,8 @@ async def test_action_unlikely_intent_warning_and_story_error( test_file_name = tmp_path / "test.yml" test_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: happy path steps: @@ -496,8 +496,8 @@ async def test_action_unlikely_intent_warning_and_story_error( train_file_name = tmp_path / "train.yml" train_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: happy path steps: @@ -545,8 +545,8 @@ async def test_fail_on_prediction_errors( file_name = tmp_path / "test_action_unlikely_intent_2.yml" file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: unlikely path (with action_unlikely_intent) steps: diff --git a/tests/core/training/test_interactive.py b/tests/core/training/test_interactive.py index ad25591b81b8..e2e9624976e9 100644 --- a/tests/core/training/test_interactive.py +++ b/tests/core/training/test_interactive.py @@ -21,6 +21,7 @@ INTENT_MESSAGE_PREFIX, DEFAULT_SENDER_ID, DOCS_URL_POLICIES, + LATEST_TRAINING_DATA_FORMAT_VERSION, ) from rasa.shared.core.constants import ACTION_LISTEN_NAME, ACTION_UNLIKELY_INTENT_NAME from rasa.shared.core.domain import Domain @@ -573,7 +574,7 @@ async def test_write_domain_to_file_with_form(tmp_path: Path): form_name = "my_form" old_domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" actions: - utter_greet - utter_goodbye diff --git a/tests/examples/test_example_bots_training_data.py b/tests/examples/test_example_bots_training_data.py index eeb34b465598..e6d0b9ca1930 100644 --- a/tests/examples/test_example_bots_training_data.py +++ b/tests/examples/test_example_bots_training_data.py @@ -8,59 +8,76 @@ @pytest.mark.parametrize( - "config_file, domain_file, data_folder", + "config_file, domain_file, data_folder, raise_slot_warning", [ ( "examples/concertbot/config.yml", "examples/concertbot/domain.yml", "examples/concertbot/data", + True, ), ( "examples/formbot/config.yml", "examples/formbot/domain.yml", "examples/formbot/data", + True, ), ( "examples/knowledgebasebot/config.yml", "examples/knowledgebasebot/domain.yml", "examples/knowledgebasebot/data", + True, ), ( "data/test_moodbot/config.yml", "data/test_moodbot/domain.yml", "data/test_moodbot/data", + False, ), ( "examples/reminderbot/config.yml", "examples/reminderbot/domain.yml", "examples/reminderbot/data", + True, ), ( "examples/rules/config.yml", "examples/rules/domain.yml", "examples/rules/data", + True, ), ], ) def test_example_bot_training_data_raises_only_auto_fill_warning( - config_file: Text, domain_file: Text, data_folder: Text + config_file: Text, + domain_file: Text, + data_folder: Text, + raise_slot_warning: bool, ): importer = TrainingDataImporter.load_from_config( config_file, domain_file, [data_folder] ) - with pytest.warns(UserWarning) as record: - importer.get_nlu_data() - importer.get_stories() + if raise_slot_warning: + with pytest.warns(UserWarning) as record: + importer.get_nlu_data() + importer.get_stories() - # two for slot auto-fill removal - assert len(record) == 2 - assert ( - "Slot auto-fill has been removed in 3.0 and replaced with " - "a new explicit mechanism to set slots." in record[0].message.args[0] - ) - assert record[0].message.args[0] == record[1].message.args[0] + assert len(record) == 2 + assert all( + [ + "Slot auto-fill has been removed in 3.0 and replaced with " + "a new explicit mechanism to set slots." in r.message.args[0] + for r in record + ] + ) + else: + with pytest.warns(None) as record: + importer.get_nlu_data() + importer.get_stories() + + assert len(record) == 0 def test_example_bot_training_on_initial_project(tmp_path: Path): @@ -74,14 +91,8 @@ def test_example_bot_training_on_initial_project(tmp_path: Path): str(tmp_path / "data"), ) - with pytest.warns(UserWarning) as record: + with pytest.warns(None) as record: importer.get_nlu_data() importer.get_stories() - # two for slot auto-fill removal - assert len(record) == 2 - assert ( - "Slot auto-fill has been removed in 3.0 and replaced with " - "a new explicit mechanism to set slots." in record[0].message.args[0] - ) - assert record[0].message.args[0] == record[1].message.args[0] + assert len(record) == 0 diff --git a/tests/graph_components/validators/test_default_recipe_validator.py b/tests/graph_components/validators/test_default_recipe_validator.py index 669319c19d2a..c4d36469f44e 100644 --- a/tests/graph_components/validators/test_default_recipe_validator.py +++ b/tests/graph_components/validators/test_default_recipe_validator.py @@ -1026,16 +1026,9 @@ def test_no_warnings_with_default_project(tmp_path: Path): ) validator = DefaultV1RecipeValidator(graph_config.train_schema) - with pytest.warns( - UserWarning, match="Slot auto-fill has been removed in 3.0" - ) as records: + with pytest.warns(None) as records: validator.validate(importer) - assert all( - [ - warn.message.args[0].startswith("Slot auto-fill has been removed") - for warn in records.list - ] - ) + assert len(records) == 0 def test_importer_with_invalid_model_config(tmp_path: Path): diff --git a/tests/nlu/classifiers/test_regex_message_handler.py b/tests/nlu/classifiers/test_regex_message_handler.py index fd07337f30a8..ce685b429eb7 100644 --- a/tests/nlu/classifiers/test_regex_message_handler.py +++ b/tests/nlu/classifiers/test_regex_message_handler.py @@ -59,6 +59,7 @@ def test_process_does_not_do_anything( responses={}, action_names=[], forms={}, + data={}, ) parsed_messages = regex_message_handler.process([message], domain) diff --git a/tests/nlu/extractors/test_extractor.py b/tests/nlu/extractors/test_extractor.py index edbba25ec3b3..d128f761064a 100644 --- a/tests/nlu/extractors/test_extractor.py +++ b/tests/nlu/extractors/test_extractor.py @@ -1,6 +1,7 @@ from typing import Any, Text, Dict, List import pytest +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.nlu.constants import TEXT, SPLIT_ENTITIES_BY_COMMA from rasa.shared.nlu.training_data.message import Message @@ -419,7 +420,7 @@ def test_split_entities_by_comma( "text, warnings", [ ( - 'version: "3.0"\n' + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' "nlu:\n" "- intent: test\n" " examples: |\n" @@ -427,7 +428,7 @@ def test_split_entities_by_comma( 1, ), ( - 'version: "3.0"\n' + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' "nlu:\n" "- intent: test\n" " examples: |\n" @@ -435,7 +436,7 @@ def test_split_entities_by_comma( 1, ), ( - 'version: "3.0"\n' + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' "nlu:\n" "- intent: test\n" " examples: |\n" @@ -444,7 +445,7 @@ def test_split_entities_by_comma( 1, ), ( - 'version: "3.0"\n' + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' "nlu:\n" "- intent: test\n" " examples: |\n" @@ -453,7 +454,7 @@ def test_split_entities_by_comma( 1, ), ( - 'version: "3.0"\n' + f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"\n' "nlu:\n" "- intent: test\n" " examples: |\n" diff --git a/tests/shared/core/test_domain.py b/tests/shared/core/test_domain.py index fb8f8777869b..c62a974c80da 100644 --- a/tests/shared/core/test_domain.py +++ b/tests/shared/core/test_domain.py @@ -178,8 +178,8 @@ def test_domain_from_template(domain: Domain): def test_avoid_action_repetition(domain: Domain): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" actions: - utter_greet responses: @@ -323,12 +323,11 @@ def test_domain_to_dict(): domain_as_dict = Domain.from_yaml(test_yaml).as_dict() assert domain_as_dict == { + "version": LATEST_TRAINING_DATA_FORMAT_VERSION, "actions": ["action_save_world"], "config": {"store_entities_as_slots": True}, KEY_E2E_ACTIONS: ["Hello, dear user", "what's up"], - "entities": [], "forms": {"some_form": {"required_slots": []}}, - "intents": [], "responses": {"utter_greet": [{"text": "hey there!"}]}, "session_config": { "carry_over_slots_to_new_session": True, @@ -337,10 +336,8 @@ def test_domain_to_dict(): "slots": { "some_slot": { "values": ["high", "low"], - "influence_conversation": True, - "initial_value": None, "mappings": [{"type": "from_text"}], - "type": "rasa.shared.core.slots.CategoricalSlot", + "type": "categorical", } }, } @@ -348,7 +345,7 @@ def test_domain_to_dict(): def test_domain_to_yaml(): test_yaml = f""" -version: '3.0' +version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' actions: - action_save_world config: @@ -366,18 +363,25 @@ def test_domain_to_yaml(): slots: {{}} """ - with pytest.warns(UserWarning) as record: - domain = Domain.from_yaml(test_yaml) - actual_yaml = domain.as_yaml() + domain = Domain.from_yaml(test_yaml) + actual_yaml = domain.as_yaml() - assert ( - "Slot auto-fill has been removed in 3.0" - " and replaced with a new explicit mechanism to set slots. " - in record[0].message.args[0] - ) + expected_yaml = f""" +version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' +actions: +- action_save_world +config: + store_entities_as_slots: true +responses: + utter_greet: + - text: hey there! +session_config: + carry_over_slots_to_new_session: true + session_expiration_time: {DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES} +""" - expected = rasa.shared.utils.io.read_yaml(test_yaml) actual = rasa.shared.utils.io.read_yaml(actual_yaml) + expected = rasa.shared.utils.io.read_yaml(expected_yaml) assert actual == expected @@ -417,7 +421,9 @@ def test_merge_yaml_domains(): domain_1 = Domain.from_yaml(test_yaml_1) domain_2 = Domain.from_yaml(test_yaml_2) + domain = domain_1.merge(domain_2) + # single attribute should be taken from domain_1 assert domain.store_entities_as_slots # conflicts should be taken from domain_1 @@ -456,6 +462,7 @@ def test_merge_yaml_domains_with_default_intents(default_intent: Text): domain_1 = Domain.from_yaml(test_yaml_1) domain_2 = Domain.from_yaml(test_yaml_2) + domain = domain_1.merge(domain_2) # check that the default intents were merged correctly @@ -463,11 +470,7 @@ def test_merge_yaml_domains_with_default_intents(default_intent: Text): assert domain.intents == sorted(["greet", *DEFAULT_INTENTS]) # ensure that the default intent is contain the domain's dictionary dump - domain_intents = [] - for intent in domain.as_dict()["intents"]: - domain_intents.append(list(intent)[0]) - - assert default_intent in domain_intents + assert default_intent in domain.as_dict()[KEY_INTENTS] def test_merge_session_config_if_first_is_not_default(): @@ -494,8 +497,8 @@ def test_merge_session_config_if_first_is_not_default(): def test_merge_with_empty_domain(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" config: store_entities_as_slots: false session_config: @@ -517,17 +520,16 @@ def test_merge_with_empty_domain(): - text: hey you! """ ) - - merged = Domain.empty().merge(domain) - + empty_domain = Domain.empty() + merged = empty_domain.merge(domain, override=True) assert merged.as_dict() == domain.as_dict() @pytest.mark.parametrize("other", [Domain.empty(), None]) def test_merge_with_empty_other_domain(other: Optional[Domain]): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" config: store_entities_as_slots: false session_config: @@ -585,6 +587,7 @@ def test_merge_domain_with_forms(): domain_1 = Domain.from_yaml(test_yaml_1) domain_2 = Domain.from_yaml(test_yaml_2) + domain = domain_1.merge(domain_2) expected_number_of_forms = 3 @@ -968,6 +971,7 @@ def test_check_domain_sanity_on_invalid_domain(): responses={}, action_names=["random_name", "random_name"], forms={}, + data={}, ) with pytest.raises(InvalidDomain): @@ -981,6 +985,7 @@ def test_check_domain_sanity_on_invalid_domain(): responses={}, action_names=[], forms={}, + data={}, ) with pytest.raises(InvalidDomain): @@ -991,6 +996,7 @@ def test_check_domain_sanity_on_invalid_domain(): responses={}, action_names=[], forms={}, + data={}, ) @@ -1053,17 +1059,17 @@ def test_is_empty(): assert Domain.empty().is_empty() -def test_transform_intents_for_file_default(): +def test_load_intents_from_as_dict_representation(): domain_path = "data/test_domains/default_unfeaturized_entities.yml" domain = Domain.load(domain_path) - transformed = domain._transform_intents_for_file() + transformed = domain.as_dict().get(KEY_INTENTS) expected = [ {"ask": {USE_ENTITIES_KEY: True}}, {"default": {IGNORE_ENTITIES_KEY: ["unrelated_recognized_entity"]}}, {"goodbye": {USE_ENTITIES_KEY: []}}, {"greet": {USE_ENTITIES_KEY: ["name"]}}, - {"pure_intent": {USE_ENTITIES_KEY: True}}, + "pure_intent", {"thank": {USE_ENTITIES_KEY: []}}, {"why": {USE_ENTITIES_KEY: []}}, ] @@ -1071,19 +1077,19 @@ def test_transform_intents_for_file_default(): assert transformed == expected -def test_transform_intents_for_files_with_entities(): +def test_load_intents_with_entities_from_as_dict(): domain_path = "data/test_domains/test_domain_from_directory_for_entities" domain = Domain.load(domain_path) - transformed = domain._transform_intents_for_file() + transformed = domain.as_dict().get(KEY_INTENTS) expected = [ {"certify": {USE_ENTITIES_KEY: True}}, {"play": {USE_ENTITIES_KEY: ["ball", "chess"]}}, - {"question": {USE_ENTITIES_KEY: True}}, + "question", {"stow_away": {USE_ENTITIES_KEY: True}}, { "support_encouraging": { - USE_ENTITIES_KEY: ["anti_freeze_blankets", "automatic_cupcakes"] + USE_ENTITIES_KEY: ["automatic_cupcakes", "anti_freeze_blankets"] } }, {"vacationing": {"ignore_entities": ["tornadoes"]}}, @@ -1092,72 +1098,43 @@ def test_transform_intents_for_files_with_entities(): assert transformed == expected -def test_transform_intents_for_file_with_mapping(): +def test_load_intents_for_file_from_as_dict(): domain_path = "data/test_domains/default_with_mapping.yml" domain = Domain.load(domain_path) - transformed = domain._transform_intents_for_file() + transformed = domain.as_dict().get(KEY_INTENTS) expected = [ - {"default": {"triggers": "utter_default", USE_ENTITIES_KEY: True}}, - {"goodbye": {USE_ENTITIES_KEY: True}}, - {"greet": {"triggers": "utter_greet", USE_ENTITIES_KEY: True}}, + {"default": {"triggers": "utter_default"}}, + "goodbye", + {"greet": {"triggers": "utter_greet"}}, ] assert transformed == expected -def test_transform_intents_for_file_with_entity_roles_groups(): +def test_load_intents_with_entity_roles_groups_from_as_dict(): domain_path = "data/test_domains/travel_form.yml" domain = Domain.load(domain_path) - transformed = domain._transform_intents_for_file() + transformed = domain.as_dict().get(KEY_INTENTS) expected = [ - {"greet": {USE_ENTITIES_KEY: ["name"]}}, + {"greet": {IGNORE_ENTITIES_KEY: ["GPE"]}}, {"inform": {USE_ENTITIES_KEY: ["GPE"]}}, ] assert transformed == expected -def test_transform_entities_for_file_default(): +def test_load_entities_from_as_dict(): domain_path = "data/test_domains/travel_form.yml" domain = Domain.load(domain_path) - transformed = domain._transform_entities_for_file() + transformed = domain.as_dict().get(KEY_ENTITIES) expected = [{"GPE": {ENTITY_ROLES_KEY: ["destination", "origin"]}}, "name"] assert transformed == expected -def test_clean_domain_for_file(): - domain_path = "data/test_domains/default_unfeaturized_entities.yml" - cleaned = Domain.load(domain_path).cleaned_domain() - - expected = { - "entities": ["name", "unrelated_recognized_entity", "other"], - "intents": [ - "ask", - {"default": {IGNORE_ENTITIES_KEY: ["unrelated_recognized_entity"]}}, - {"goodbye": {USE_ENTITIES_KEY: []}}, - {"greet": {USE_ENTITIES_KEY: ["name"]}}, - "pure_intent", - {"thank": {USE_ENTITIES_KEY: []}}, - {"why": {USE_ENTITIES_KEY: []}}, - ], - "responses": { - "utter_default": [{"text": "default message"}], - "utter_goodbye": [{"text": "goodbye :("}], - "utter_greet": [{"text": "hey there!"}], - }, - "session_config": { - "carry_over_slots_to_new_session": True, - "session_expiration_time": DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, - }, - } - - assert cleaned == expected - - def test_not_add_knowledge_base_slots(): test_domain = Domain.empty() @@ -1171,7 +1148,7 @@ def test_not_add_knowledge_base_slots(): def test_add_knowledge_base_slots(): test_domain = Domain.from_yaml( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" actions: - {DEFAULT_KNOWLEDGE_BASE_ACTION} """ @@ -1226,8 +1203,7 @@ def test_session_config( def test_domain_as_dict_with_session_config(): session_config = SessionConfig(123, False) - domain = Domain.empty() - domain.session_config = session_config + domain = Domain([], [], [], {}, [], {}, {}, None, True, session_config) serialized = domain.as_dict() deserialized = Domain.from_dict(serialized) @@ -1488,8 +1464,8 @@ def test_form_invalid_mappings(domain_as_dict: Dict[Text, Any]): def test_form_invalid_required_slots_raises(): with pytest.raises(YamlValidationException): Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - some_entity forms: @@ -1557,23 +1533,23 @@ def test_slot_invalid_mappings(domain_as_dict: Dict[Text, Any]): [ # Wrong type for slots ( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: [] """ ), # Wrong type for slot names ( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: some_slot: 5 """ ), ( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: some_slot: [] """ @@ -1639,7 +1615,7 @@ def test_slot_order_is_preserved(): """ domain = Domain.from_yaml(test_yaml) - assert domain.as_yaml(clean_before_dump=True) == test_yaml + assert domain.as_yaml() == test_yaml def test_slot_order_is_preserved_when_merging(): @@ -1675,18 +1651,18 @@ def test_slot_order_is_preserved_when_merging(): slots:{slot_2} """ - test_yaml_merged = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' + test_yaml_merged = f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" +slots:{slot_2}{slot_1} session_config: session_expiration_time: 60 carry_over_slots_to_new_session: true -slots:{slot_2}{slot_1} """ domain_1 = Domain.from_yaml(test_yaml_1) domain_2 = Domain.from_yaml(test_yaml_2) domain_merged = domain_1.merge(domain_2) - assert domain_merged.as_yaml(clean_before_dump=True) == test_yaml_merged + assert domain_merged.as_yaml() == test_yaml_merged def test_responses_text_multiline_is_preserved(): @@ -1707,7 +1683,7 @@ def test_responses_text_multiline_is_preserved(): """ domain = Domain.from_yaml(test_yaml) - assert domain.as_yaml(clean_before_dump=True) == test_yaml + assert domain.as_yaml() == test_yaml def test_is_valid_domain_doesnt_raise_with_valid_domain(tmpdir: Path): @@ -1792,8 +1768,8 @@ def test_domain_count_conditional_response_variations(): def test_domain_with_no_form_slots(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" forms: contract_form: required_slots: [] @@ -1805,8 +1781,8 @@ def test_domain_with_no_form_slots(): def test_domain_with_empty_required_slots(): with pytest.raises(YamlException): Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" forms: contract_form: """ @@ -1821,68 +1797,26 @@ def test_domain_invalid_yml_in_folder(): Domain.from_directory("data/test_domains/test_domain_from_directory/") -def test_domain_with_duplicates(): +def test_invalid_domain_dir_with_duplicates(): """ - Check if a domain with duplicated slots, responses and intents in domain files - removes the duplications in the domain. + Raises InvalidDomain if a domain is loaded from a directory with duplicated slots, + responses and intents in domain files. """ - domain = Domain.from_directory("data/test_domains/test_domain_with_duplicates/") - expected_intents = [ - "affirm", - "back", - "bot_challenge", - "deny", - "goodbye", - "greet", - "mood_great", - "mood_unhappy", - "nlu_fallback", - "out_of_scope", - "restart", - "session_start", - "test", - ] - expected_responses = { - "utter_greet": [{"text": "Hey! How are you?"}], - "utter_did_that_help": [{"text": "Did that help you?"}], - "utter_happy": [{"text": "Great, carry on!"}], - "utter_cheer_up": [ - { - "text": "Here is something to cheer you up:", - "image": "https://i.imgur.com/nGF1K8f.jpg", - } - ], - "utter_goodbye": [{"text": "Bye"}], - "utter_iamabot": [{"text": "I am a bot, powered by Rasa."}], - } - assert domain.intents == expected_intents - assert domain.responses == expected_responses - assert domain.duplicates["slots"] == ["mood"] - assert domain.duplicates["responses"] == ["utter_did_that_help", "utter_greet"] - assert domain.duplicates["intents"] == ["greet"] - - -def test_domain_without_duplicates(): - """ - Check if a domain without duplicated slots, responses and intents contains - nothing in `duplicates` field. - """ - domain = Domain.from_directory("data/test_domains/test_domain_without_duplicates/") - assert domain.duplicates == {} - - -def test_domain_duplicates_when_one_domain_file(): - """ - Check if a domain with duplicated slots, responses and intents contains - a correct information in `duplicates` field. - """ - domain = Domain.from_file(path="data/test_domains/default.yml") - assert domain.duplicates is None + with pytest.warns(UserWarning) as warning: + Domain.from_directory("data/test_domains/test_domain_with_duplicates/") + + error_message = ( + "The following duplicated intents have been found across multiple domain files: greet \n" + "The following duplicated responses have been found across multiple domain files: " + "utter_did_that_help, utter_greet \n" + "The following duplicated slots have been found across multiple domain files: mood" + ) + assert error_message == warning[2].message.args[0] def test_domain_fingerprint_consistency_across_runs(): - domain_yaml = """ - version: "3.0" + domain_yaml = f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet - goodbye @@ -1914,8 +1848,8 @@ def test_domain_fingerprint_consistency_across_runs(): def test_domain_fingerprint_uniqueness(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet - goodbye @@ -1926,8 +1860,8 @@ def test_domain_fingerprint_uniqueness(): f1 = domain.fingerprint() domain_with_extra_intent = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet - goodbye @@ -1940,8 +1874,8 @@ def test_domain_fingerprint_uniqueness(): assert f1 != f2 domain_with_extra_action = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet - goodbye @@ -1954,8 +1888,8 @@ def test_domain_fingerprint_uniqueness(): assert f1 != f3 domain_with_extra_responses = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet - goodbye @@ -1973,8 +1907,8 @@ def test_domain_fingerprint_uniqueness(): def test_domain_slots_for_entities_with_mapping_conditions_no_slot_set(): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - city slots: @@ -2000,8 +1934,8 @@ def test_domain_slots_for_entities_with_mapping_conditions_no_slot_set(): def test_domain_slots_for_entities_sets_valid_slot(): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - city slots: @@ -2021,8 +1955,8 @@ def test_domain_slots_for_entities_sets_valid_slot(): def test_domain_slots_for_entities_sets_valid_list_slot(): domain = Domain.from_yaml( textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - topping slots: @@ -2046,8 +1980,8 @@ def test_domain_slots_for_entities_sets_valid_list_slot(): def test_domain_slots_for_entities_with_entity_mapping_to_multiple_slots(): domain = Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - city slots: diff --git a/tests/shared/core/test_slot_mappings.py b/tests/shared/core/test_slot_mappings.py index 6ecd3d4db1cd..4b344e9524de 100644 --- a/tests/shared/core/test_slot_mappings.py +++ b/tests/shared/core/test_slot_mappings.py @@ -1,6 +1,7 @@ from typing import Text import pytest +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.core.domain import Domain from rasa.shared.core.events import UserUttered, ActiveLoop @@ -68,7 +69,7 @@ def test_slot_mapping_intent_is_desired(domain: Domain): def test_slot_mappings_ignored_intents_during_active_loop(): domain = Domain.from_yaml( """ - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet - chitchat @@ -104,8 +105,8 @@ def test_slot_mappings_ignored_intents_during_active_loop(): def test_missing_slot_mappings_raises(): with pytest.raises(YamlValidationException): Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" slots: some_slot: type: text @@ -117,8 +118,8 @@ def test_missing_slot_mappings_raises(): def test_slot_mappings_invalid_type_raises(): with pytest.raises(YamlValidationException): Domain.from_yaml( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - from_entity slots: diff --git a/tests/shared/core/test_trackers.py b/tests/shared/core/test_trackers.py index 7562bd7e9a7b..905756dd5f02 100644 --- a/tests/shared/core/test_trackers.py +++ b/tests/shared/core/test_trackers.py @@ -25,7 +25,7 @@ REQUESTED_SLOT, LOOP_INTERRUPTED, ) -from rasa.shared.constants import DEFAULT_SENDER_ID +from rasa.shared.constants import DEFAULT_SENDER_ID, LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.core.agent import Agent from rasa.shared.core.domain import Domain from rasa.shared.core.events import ( @@ -1378,7 +1378,7 @@ async def test_fill_slots_for_policy_entities(): domain = Domain.from_yaml( textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - {nlu_entity} - {policy_entity} diff --git a/tests/shared/core/training_data/story_reader/test_yaml_story_reader.py b/tests/shared/core/training_data/story_reader/test_yaml_story_reader.py index 44a173cb31cf..51521fc4a263 100644 --- a/tests/shared/core/training_data/story_reader/test_yaml_story_reader.py +++ b/tests/shared/core/training_data/story_reader/test_yaml_story_reader.py @@ -977,6 +977,7 @@ def test_process_unpacks_attributes_from_single_message_and_fallsback_if_needed( responses={}, action_names=[], forms={}, + data={}, ) # extract information @@ -1068,6 +1069,7 @@ def test_process_warns_if_intent_or_entities_not_in_domain( responses={}, action_names=[], forms={}, + data={}, ) # expect a warning @@ -1097,6 +1099,7 @@ async def test_unpack_regex_message_has_correct_entity_start_and_end(): responses={}, action_names=[], forms={}, + data={}, ) message = YAMLStoryReader.unpack_regex_message( diff --git a/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py b/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py index 4c6f44ccd835..9f5cda7a0ab4 100644 --- a/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py +++ b/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py @@ -3,6 +3,7 @@ from typing import Text from collections import OrderedDict import pytest +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.core.constants import ( ACTION_SESSION_START_NAME, @@ -87,8 +88,8 @@ def test_yaml_writer_dumps_user_messages(): assert ( dump.strip() == textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: default steps: @@ -115,8 +116,8 @@ def test_yaml_writer_doesnt_dump_action_unlikely_intent(): assert ( dump.strip() == textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: default steps: @@ -139,8 +140,8 @@ def test_yaml_writer_avoids_dumping_not_existing_user_messages(): assert ( dump.strip() == textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: default steps: @@ -198,7 +199,7 @@ def test_yaml_writer_stories_to_yaml_with_null_entities(domain: Domain): writer = YAMLStoryWriter() stories = textwrap.dedent( """ - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: happy path steps: @@ -251,7 +252,7 @@ def test_writing_end_to_end_stories(domain: Domain): dump.strip() == textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: {story_name} steps: @@ -298,7 +299,7 @@ def test_reading_and_writing_end_to_end_stories_in_test_mode(domain: Domain): dump.strip() == textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: {story_name} steps: diff --git a/tests/shared/core/training_data/test_structures.py b/tests/shared/core/training_data/test_structures.py index e17f5d413577..f42dcfe316ea 100644 --- a/tests/shared/core/training_data/test_structures.py +++ b/tests/shared/core/training_data/test_structures.py @@ -1,4 +1,5 @@ import rasa.core +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.shared.core.constants import ACTION_SESSION_START_NAME from rasa.shared.core.domain import Domain from rasa.shared.core.events import ( @@ -41,7 +42,7 @@ def test_session_start_is_not_serialised(domain: Domain): Story.from_events(tracker.events, "some-story01").story_steps ) - expected = """version: "3.0" + expected = f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-story01 steps: diff --git a/tests/shared/utils/test_common.py b/tests/shared/utils/test_common.py index 323a431bf17a..4753300677bf 100644 --- a/tests/shared/utils/test_common.py +++ b/tests/shared/utils/test_common.py @@ -179,3 +179,87 @@ def test_class_from_module_path_fails(): module_path = "rasa.shared.core.domain.logger" with pytest.raises(RasaException): rasa.shared.utils.common.class_from_module_path(module_path) + + +def test_extract_duplicates(): + list_one = ["greet", {"inform": {"use_entities": []}}, "start_form", "goodbye"] + list_two = ["goodbye", {"inform": {"use_entities": ["destination"]}}] + + expected = ["goodbye", "inform"] + result = rasa.shared.utils.common.extract_duplicates(list_one, list_two) + + assert result == expected + + +def test_extract_duplicates_with_unique_lists(): + list_one = ["greet", {"inform": {"use_entities": []}}, "start_form", "goodbye"] + list_two = ["bot_challenge", {"mood_sad": {"ignore_entities": []}}] + + result = rasa.shared.utils.common.extract_duplicates(list_one, list_two) + assert result == [] + + +def test_clean_duplicates(): + duplicates = {"intents": ["goodbye", "inform"], "entities": []} + expected = {"intents": ["goodbye", "inform"]} + result = rasa.shared.utils.common.clean_duplicates(duplicates) + assert result == expected + + +def test_merge_lists(): + list_one = ["greet", "start_form", "goodbye"] + list_two = ["goodbye", "bot_challenge", "greet"] + expected = ["bot_challenge", "goodbye", "greet", "start_form"] + result = rasa.shared.utils.common.merge_lists(list_one, list_two) + + assert result == expected + + +@pytest.mark.parametrize("override_existing_values", [False, True]) +def test_merge_dicts(override_existing_values): + dict_1 = {"intents": ["greet", "goodbye"], "entities": ["name"]} + dict_2 = { + "responses": {"utter_greet": [{"text": "Hi"}]}, + "intents": ["bot_challenge"], + } + + if override_existing_values: + expected = { + "entities": ["name"], + "intents": ["bot_challenge"], + "responses": {"utter_greet": [{"text": "Hi"}]}, + } + else: + expected = { + "entities": ["name"], + "intents": ["greet", "goodbye"], + "responses": {"utter_greet": [{"text": "Hi"}]}, + } + + result = rasa.shared.utils.common.merge_dicts( + dict_1, dict_2, override_existing_values + ) + + assert result == expected + + +@pytest.mark.parametrize("override_existing_values", [False, True]) +def test_merge_lists_of_dicts(override_existing_values): + list_one = ["greet", {"inform": {"use_entities": []}}, "start_form", "goodbye"] + list_two = ["goodbye", {"inform": {"use_entities": ["destination"]}}] + + if override_existing_values: + expected = [ + "greet", + {"inform": {"use_entities": ["destination"]}}, + "start_form", + "goodbye", + ] + else: + expected = ["goodbye", {"inform": {"use_entities": []}}, "greet", "start_form"] + + result = rasa.shared.utils.common.merge_lists_of_dicts( + list_one, list_two, override_existing_values + ) + + assert result == expected diff --git a/tests/shared/utils/test_validation.py b/tests/shared/utils/test_validation.py index a13b2acf7f3e..e32d55e364a6 100644 --- a/tests/shared/utils/test_validation.py +++ b/tests/shared/utils/test_validation.py @@ -50,8 +50,8 @@ def test_validate_yaml_schema_raise_exception(file: Text, schema: Text): def test_validate_yaml_schema_raise_exception_null_text(): - domain = """ - version: "3.0" + domain = f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_ask_email: - text: What is your email ID? @@ -68,8 +68,8 @@ def test_validate_yaml_schema_raise_exception_null_text(): def test_validate_yaml_schema_raise_exception_extra_hyphen_for_image(): - domain = """ - version: "3.0" + domain = f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" responses: utter_cheer_up: - image: https://i.imgur.com/nGF1K8f.jpg @@ -225,8 +225,8 @@ def test_concurrent_schema_validation(): successful_results = [] def validate() -> None: - payload = """ -version: "3.0" + payload = f""" +version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" nlu: - intent: greet examples: | diff --git a/tests/test_model_testing.py b/tests/test_model_testing.py index d030355372a9..a0e42bb08ac8 100644 --- a/tests/test_model_testing.py +++ b/tests/test_model_testing.py @@ -32,6 +32,7 @@ ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_TEXT, ) +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION def monkeypatch_get_latest_model(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: @@ -382,8 +383,8 @@ def test_write_classification_errors(): assert ( dump.strip() == textwrap.dedent( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: default steps: diff --git a/tests/test_model_training.py b/tests/test_model_training.py index 7f758279170b..ad61a4dbd59c 100644 --- a/tests/test_model_training.py +++ b/tests/test_model_training.py @@ -33,6 +33,7 @@ from rasa.nlu.classifiers.diet_classifier import DIETClassifier +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION import rasa.shared.utils.io from rasa.shared.core.domain import Domain from rasa.shared.exceptions import InvalidConfigException @@ -910,8 +911,9 @@ def test_models_not_retrained_if_only_new_responses( utter_greet: - text: "Hi from Rasa" """ + domain_with_extra_response = Domain.from_yaml(domain_with_extra_response) - new_domain = domain.merge(Domain.from_yaml(domain_with_extra_response)) + new_domain = domain.merge(domain_with_extra_response) new_domain_path = tmp_path / "domain.yml" rasa.shared.utils.io.write_yaml(new_domain.as_dict(), new_domain_path) @@ -963,7 +965,7 @@ def test_invalid_graph_schema( ): config = textwrap.dedent( """ - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" recipe: "default.v1" pipeline: @@ -1003,7 +1005,7 @@ def test_fingerprint_changes_if_module_changes( config = textwrap.dedent( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" recipe: "default.v1" policies: diff --git a/tests/test_server.py b/tests/test_server.py index f4e658096d96..8a2966d7ee2f 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -70,6 +70,7 @@ ENTITY_ATTRIBUTE_VALUE, PREDICTED_CONFIDENCE_KEY, ) +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.model_training import TrainingResult from rasa.utils.endpoints import EndpointConfig from tests.conftest import AsyncMock, with_model_id, with_model_ids @@ -576,8 +577,8 @@ def assert_trained_model( async def test_train_with_yaml( rasa_app: SanicASGITestClient, tmp_path_factory: TempPathFactory ): - training_data = """ -version: "3.0" + training_data = f""" +version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: My story @@ -1527,7 +1528,7 @@ async def test_unload_model_error(rasa_app: SanicASGITestClient): assert response.status == HTTPStatus.NO_CONTENT -async def test_get_domain(rasa_app: SanicASGITestClient): +async def test_get_domain(rasa_app: SanicASGITestClient, domain_path: Text): _, response = await rasa_app.get( "/domain", headers={"accept": rasa.server.JSON_CONTENT_TYPE} ) @@ -1535,12 +1536,10 @@ async def test_get_domain(rasa_app: SanicASGITestClient): content = response.json assert response.status == HTTPStatus.OK - assert "config" in content - assert "intents" in content - assert "entities" in content - assert "slots" in content - assert "responses" in content - assert "actions" in content + # assert only keys in `domain_path` fixture + original_domain_dict = Domain.load(domain_path).as_dict() + for key in original_domain_dict.keys(): + assert key in content async def test_get_domain_invalid_accept_header(rasa_app: SanicASGITestClient): @@ -1811,7 +1810,7 @@ class NoInputChannels: ], None, True, - """version: "3.0" + f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-conversation-ID steps: @@ -1834,7 +1833,7 @@ class NoInputChannels: ], None, True, - """version: "3.0" + f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-conversation-ID, story 1 steps: @@ -1864,7 +1863,7 @@ class NoInputChannels: ], None, False, - """version: "3.0" + f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-conversation-ID steps: @@ -1888,7 +1887,7 @@ class NoInputChannels: ], None, None, - """version: "3.0" + f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-conversation-ID steps: @@ -1911,7 +1910,7 @@ class NoInputChannels: ], 4, True, - """version: "3.0" + f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-conversation-ID steps: @@ -1921,7 +1920,7 @@ class NoInputChannels: - action: utter_greet""", ), # empty conversation - ([], None, True, 'version: "3.0"'), + ([], None, True, f'version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"'), # Conversation with slot ( [ @@ -1933,7 +1932,7 @@ class NoInputChannels: ], None, True, - """version: "3.0" + f"""version: "{rasa.shared.constants.LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-conversation-ID steps: @@ -2037,7 +2036,7 @@ async def test_get_story_does_not_update_conversation_session( # expected story is returned assert ( response.content.decode().strip() - == """version: "3.0" + == f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: some-conversation-ID steps: diff --git a/tests/test_validator.py b/tests/test_validator.py index adc41604989c..6efae558467a 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,12 +1,12 @@ -from typing import Text, Any, Optional, List, Dict +from typing import Text import pytest from _pytest.logging import LogCaptureFixture +from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION from rasa.validator import Validator from rasa.shared.importers.rasa import RasaFileImporter -from rasa.shared.core.domain import Domain from pathlib import Path @@ -111,8 +111,8 @@ def test_verify_bad_story_structure(): def test_verify_bad_e2e_story_structure_when_text_identical(tmp_path: Path): story_file_name = tmp_path / "stories.yml" story_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: path 1 steps: @@ -283,19 +283,28 @@ def test_early_exit_on_invalid_domain(): validator = Validator.from_importer(importer) validator.verify_domain_validity() - # two for non-unique domains, two for auto-fill removal + # two for non-unique domains, 2 for auto-fill removal assert len(record) == 4 - assert any( - [ - f"Loading domain from '{domain_path}' failed. Using empty domain. " - "Error: 'Intents are not unique! Found multiple intents with name(s) " - "['default', 'goodbye']. Either rename or remove the duplicate ones.'" - in warning.message.args[0] - for warning in record - ] + + non_unique_warnings = list( + filter( + lambda warning: f"Loading domain from '{domain_path}' failed. " + f"Using empty domain. Error: 'Intents are not unique! " + f"Found multiple intents with name(s) ['default', 'goodbye']. " + f"Either rename or remove the duplicate ones.'" in warning.message.args[0], + record, + ) ) - assert record[0].message.args[0] == record[2].message.args[0] - assert record[1].message.args[0] == record[3].message.args[0] + assert len(non_unique_warnings) == 2 + + auto_fill_warnings = list( + filter( + lambda warning: "Slot auto-fill has been removed in 3.0" + in warning.message.args[0], + record, + ) + ) + assert len(auto_fill_warnings) == 2 def test_verify_there_is_not_example_repetition_in_intents(): @@ -310,8 +319,8 @@ def test_verify_there_is_not_example_repetition_in_intents(): def test_verify_actions_in_stories_not_in_domain(tmp_path: Path, domain_path: Text): story_file_name = tmp_path / "stories.yml" story_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" stories: - story: story path 1 steps: @@ -337,8 +346,8 @@ def test_verify_actions_in_stories_not_in_domain(tmp_path: Path, domain_path: Te def test_verify_actions_in_rules_not_in_domain(tmp_path: Path, domain_path: Text): rules_file_name = tmp_path / "rules.yml" rules_file_name.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" rules: - rule: rule path 1 steps: @@ -360,83 +369,11 @@ def test_verify_actions_in_rules_not_in_domain(tmp_path: Path, domain_path: Text ) -@pytest.mark.parametrize( - "duplicates,is_valid,warning_type,messages", - [ - (None, True, None, []), - ({}, True, None, []), - ({"responses": []}, True, None, []), - ( - {"responses": ["some_response"]}, - False, - UserWarning, - [ - "The following duplicated responses has been found across " - "multiple domain files: some_response" - ], - ), - ( - {"slots": ["some_slot"]}, - False, - UserWarning, - [ - "The following duplicated slots has been found across " - "multiple domain files: some_slot" - ], - ), - ( - {"forms": ["form1", "form2"]}, - False, - UserWarning, - [ - "The following duplicated forms has been found across " - "multiple domain files: form1, form2" - ], - ), - ( - {"forms": ["form1", "form2"], "slots": []}, - False, - UserWarning, - [ - "The following duplicated forms has been found across " - "multiple domain files: form1, form2" - ], - ), - ( - {"forms": ["form1", "form2"], "slots": ["slot1", "slot2", "slot3"]}, - False, - UserWarning, - [ - "The following duplicated forms has been found across " - "multiple domain files: form1, form2", - "The following duplicated slots has been found across " - "multiple domain files: slot1, slot2, slot3", - ], - ), - ], -) -def test_verify_domain_with_duplicates( - duplicates: Optional[Dict[Text, List[Text]]], - is_valid: bool, - warning_type: Any, - messages: List[Text], -): - domain = Domain([], [], [], {}, [], {}, duplicates=duplicates) - validator = Validator(domain, None, None, None) - - with pytest.warns(warning_type) as warning: - assert validator.verify_domain_duplicates() is is_valid - - assert len(warning) == len(messages) - for i in range(len(messages)): - assert messages[i] in warning[i].message.args[0] - - def test_verify_form_slots_invalid_domain(tmp_path: Path): domain = tmp_path / "domain.yml" domain.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" forms: name_form: required_slots: @@ -494,8 +431,8 @@ def test_valid_stories_rules_actions_in_domain( ): domain = tmp_path / "domain.yml" domain.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet actions: @@ -505,7 +442,7 @@ def test_valid_stories_rules_actions_in_domain( file_name = tmp_path / f"{file_name}.yml" file_name.write_text( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" {file_name}: - {data_type}: test path steps: @@ -526,8 +463,8 @@ def test_valid_stories_rules_default_actions( ): domain = tmp_path / "domain.yml" domain.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - greet """ @@ -535,7 +472,7 @@ def test_valid_stories_rules_default_actions( file_name = tmp_path / f"{file_name}.yml" file_name.write_text( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" {file_name}: - {data_type}: test path steps: @@ -551,8 +488,8 @@ def test_valid_stories_rules_default_actions( def test_valid_form_slots_in_domain(tmp_path: Path): domain = tmp_path / "domain.yml" domain.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" forms: name_form: required_slots: @@ -579,7 +516,7 @@ def test_verify_slot_mappings_mapping_active_loop_not_in_forms(tmp_path: Path): slot_name = "some_slot" domain.write_text( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" entities: - some_entity slots: @@ -615,7 +552,7 @@ def test_verify_slot_mappings_from_trigger_intent_mapping_slot_not_in_forms( slot_name = "started_booking_form" domain.write_text( f""" - version: "3.0" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - activate_booking entities: @@ -652,8 +589,8 @@ def test_verify_slot_mappings_from_trigger_intent_mapping_slot_not_in_forms( def test_verify_slot_mappings_slot_with_mapping_conditions_not_in_form(tmp_path: Path): domain = tmp_path / "domain.yml" domain.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - activate_booking entities: @@ -693,8 +630,8 @@ def test_verify_slot_mappings_slot_with_mapping_conditions_not_in_form(tmp_path: def test_verify_slot_mappings_valid(tmp_path: Path): domain = tmp_path / "domain.yml" domain.write_text( - """ - version: "3.0" + f""" + version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" intents: - activate_booking entities: