From 33959e238bf7f3eb290868514db3f59584ebb742 Mon Sep 17 00:00:00 2001
From: Mahesh <maheshsattala@gmail.com>
Date: Wed, 13 Nov 2024 16:10:06 +0530
Subject: [PATCH 1/2] added code related to handling the extra creation of nlu
 fallback data while uploading the files.

---
 kairon/importer/data_importer.py |  3 ++-
 kairon/shared/data/processor.py  | 12 +++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/kairon/importer/data_importer.py b/kairon/importer/data_importer.py
index 919d8bbda..2eb544e4d 100644
--- a/kairon/importer/data_importer.py
+++ b/kairon/importer/data_importer.py
@@ -56,4 +56,5 @@ def import_data(self):
                                                           self.validator.bot_content,
                                                           self.validator.chat_client_config.get('config'),
                                                           self.validator.other_collections,
-                                                          self.overwrite, self.files_to_save)
+                                                          self.overwrite, self.files_to_save,
+                                                          file_upload=True)
diff --git a/kairon/shared/data/processor.py b/kairon/shared/data/processor.py
index c9394e123..68536dbe6 100644
--- a/kairon/shared/data/processor.py
+++ b/kairon/shared/data/processor.py
@@ -569,6 +569,7 @@ def save_training_data(
             other_collections: dict = None,
             overwrite: bool = False,
             what: set = REQUIREMENTS.copy(),
+            file_upload: bool = False
     ):
         if overwrite:
             self.delete_bot_data(bot, user, what)
@@ -585,7 +586,7 @@ def save_training_data(
         if "rules" in what:
             self.save_rules(story_graph.story_steps, bot, user)
         if "config" in what:
-            self.add_or_overwrite_config(config, bot, user)
+            self.add_or_overwrite_config(config, bot, user, file_upload)
         if "chat_client_config" in what:
             self.save_chat_client_config(chat_client_config, bot, user)
         if "multiflow_stories" in what:
@@ -2131,7 +2132,7 @@ def save_config(self, configs: dict, bot: Text, user: Text):
             logging.info(e)
             raise AppException(e)
 
-    def add_or_overwrite_config(self, configs: dict, bot: Text, user: Text):
+    def add_or_overwrite_config(self, configs: dict, bot: Text, user: Text, file_upload: bool = False):
         """
         saves bot configuration
 
@@ -2142,7 +2143,7 @@ def add_or_overwrite_config(self, configs: dict, bot: Text, user: Text):
         """
         for custom_component in Utility.environment["model"]["pipeline"]["custom"]:
             self.__insert_bot_id(configs, bot, custom_component)
-        self.add_default_fallback_config(configs, bot, user)
+        self.add_default_fallback_config(configs, bot, user, file_upload)
         try:
             config_obj = Configs.objects().get(bot=bot)
         except DoesNotExist:
@@ -5402,7 +5403,7 @@ def prepare_training_data_for_validation(
             rules = self.get_rules_for_training(bot)
             YAMLStoryWriter().dump(rules_path, rules.story_steps)
 
-    def add_default_fallback_config(self, config_obj: dict, bot: Text, user: Text):
+    def add_default_fallback_config(self, config_obj: dict, bot: Text, user: Text, file_upload: bool = False):
         idx = next(
             (
                 idx
@@ -5449,7 +5450,8 @@ def add_default_fallback_config(self, config_obj: dict, bot: Text, user: Text):
                 fallback = {"name": "FallbackClassifier", "threshold": 0.7}
                 config_obj["pipeline"].insert(property_idx + 1, fallback)
 
-        self.add_default_fallback_data(bot, user, True, True)
+        if not file_upload:
+            self.add_default_fallback_data(bot, user, True, True)
 
     def add_default_fallback_data(
             self,

From f2b215f67292b68221bdd51a1de7fc7014bec452 Mon Sep 17 00:00:00 2001
From: Mahesh <maheshsattala@gmail.com>
Date: Wed, 13 Nov 2024 22:55:31 +0530
Subject: [PATCH 2/2] added code related to handling the extra creation of nlu
 fallback data while uploading the files.

---
 .../validator/valid_data/actions.yml          | 25 +++++++++
 .../valid_data/chat_client_config.yml         | 44 +++++++++++++++
 .../validator/valid_data/config.yml           | 26 +++++++++
 .../validator/valid_data/data/nlu.yml         | 13 +++++
 .../validator/valid_data/data/rules.yml       | 25 +++++++++
 .../validator/valid_data/data/stories.yml     | 14 +++++
 .../validator/valid_data/domain.yml           | 26 +++++++++
 tests/unit_test/events/events_test.py         | 54 ++++++++++++++++---
 .../unit_test/validator/data_importer_test.py | 29 ++++++++--
 9 files changed, 245 insertions(+), 11 deletions(-)
 create mode 100644 tests/testing_data/validator/valid_data/actions.yml
 create mode 100644 tests/testing_data/validator/valid_data/chat_client_config.yml
 create mode 100644 tests/testing_data/validator/valid_data/config.yml
 create mode 100644 tests/testing_data/validator/valid_data/data/nlu.yml
 create mode 100644 tests/testing_data/validator/valid_data/data/rules.yml
 create mode 100644 tests/testing_data/validator/valid_data/data/stories.yml
 create mode 100644 tests/testing_data/validator/valid_data/domain.yml

diff --git a/tests/testing_data/validator/valid_data/actions.yml b/tests/testing_data/validator/valid_data/actions.yml
new file mode 100644
index 000000000..2cd53cbfd
--- /dev/null
+++ b/tests/testing_data/validator/valid_data/actions.yml
@@ -0,0 +1,25 @@
+http_action:
+- action_name: action_say_hello
+  content_type: json
+  headers: []
+  http_url: https://jsonplaceholder.typicode.com/posts/1
+  params_list: []
+  request_method: GET
+  response:
+    dispatch: true
+    dispatch_type: text
+    evaluation_type: expression
+    value: ${data}
+  set_slots: []
+- action_name: action_say_goodbye
+  content_type: json
+  headers: []
+  http_url: https://jsonplaceholder.typicode.com/posts/1
+  params_list: []
+  request_method: GET
+  response:
+    dispatch: true
+    dispatch_type: text
+    evaluation_type: expression
+    value: ${data}
+  set_slots: []
\ No newline at end of file
diff --git a/tests/testing_data/validator/valid_data/chat_client_config.yml b/tests/testing_data/validator/valid_data/chat_client_config.yml
new file mode 100644
index 000000000..b5851517a
--- /dev/null
+++ b/tests/testing_data/validator/valid_data/chat_client_config.yml
@@ -0,0 +1,44 @@
+config:
+  api_server_host_url: http://testserver
+  botClassName: ''
+  buttonType: button
+  chatContainerClassName: ''
+  chat_server_base_url: null
+  container: '#root'
+  containerClassName: ''
+  formClassName: ''
+  headerClassName: ''
+  name: kairon_testing
+  openButtonClassName: ''
+  styles:
+    botStyle:
+      backgroundColor: '#e0e0e0'
+      color: '#000000'
+      fontFamily: '''Roboto'', sans-serif'
+      fontSize: 14px
+      iconSrc: ''
+      showIcon: 'false'
+    buttonStyle:
+      backgroundColor: '#2b3595'
+      color: '#ffffff'
+    containerStyles:
+      background: '#ffffff'
+      height: 500px
+      width: 350px
+    headerStyle:
+      backgroundColor: '#2b3595'
+      color: '#ffffff'
+      height: 60px
+    userStyle:
+      backgroundColor: '#2b3595'
+      color: '#ffffff'
+      fontFamily: '''Roboto'', sans-serif'
+      fontSize: 14px
+      iconSrc: ''
+      showIcon: 'false'
+  userClassName: ''
+  userStorage: ls
+  userType: custom
+  welcomeMessage: Hello! How are you? This is Testing Welcome Message.
+  whitelist:
+  - '*'
diff --git a/tests/testing_data/validator/valid_data/config.yml b/tests/testing_data/validator/valid_data/config.yml
new file mode 100644
index 000000000..c7d8d5464
--- /dev/null
+++ b/tests/testing_data/validator/valid_data/config.yml
@@ -0,0 +1,26 @@
+recipe: default.v1
+language: en
+pipeline:
+- name: WhitespaceTokenizer
+- name: RegexFeaturizer
+- name: LexicalSyntacticFeaturizer
+- name: CountVectorsFeaturizer
+- analyzer: char_wb
+  max_ngram: 4
+  min_ngram: 1
+  name: CountVectorsFeaturizer
+- name: FallbackClassifier
+  threshold: 0.75
+- epochs: 5
+  name: DIETClassifier
+- name: EntitySynonymMapper
+- epochs: 5
+  name: ResponseSelector
+policies:
+- name: MemoizationPolicy
+- epochs: 5
+  max_history: 5
+  name: TEDPolicy
+- name: RulePolicy
+  core_fallback_threshold: 0.3
+  core_fallback_action_name: action_small_talk
diff --git a/tests/testing_data/validator/valid_data/data/nlu.yml b/tests/testing_data/validator/valid_data/data/nlu.yml
new file mode 100644
index 000000000..c022c7a84
--- /dev/null
+++ b/tests/testing_data/validator/valid_data/data/nlu.yml
@@ -0,0 +1,13 @@
+version: "3.1"
+nlu:
+- intent: greet
+  examples: |
+    - hey
+    - hello
+    - hi
+- intent: deny
+  examples: |
+    - no
+    - never
+    - I don't think so
+    - don't like that
diff --git a/tests/testing_data/validator/valid_data/data/rules.yml b/tests/testing_data/validator/valid_data/data/rules.yml
new file mode 100644
index 000000000..cf219f4e7
--- /dev/null
+++ b/tests/testing_data/validator/valid_data/data/rules.yml
@@ -0,0 +1,25 @@
+rules:
+- rule: ask the user to rephrase whenever they send a message with low nlu confidence
+  steps:
+  - intent: nlu_fallback
+  - action: utter_please_rephrase
+
+- rule: Only say `hello` if the user provided a location
+  condition:
+  - slot_was_set:
+    - location: true
+  steps:
+  - intent: greet
+  - action: utter_greet
+
+- rule: Say `hello` when the user starts a conversation with intent `greet`
+  conversation_start: true
+  steps:
+  - intent: greet
+  - action: utter_greet
+
+- rule: Rule which will not wait for user message once it was applied
+  steps:
+  - intent: greet
+  - action: utter_greet
+  wait_for_user_input: false
diff --git a/tests/testing_data/validator/valid_data/data/stories.yml b/tests/testing_data/validator/valid_data/data/stories.yml
new file mode 100644
index 000000000..dbdc9c0fb
--- /dev/null
+++ b/tests/testing_data/validator/valid_data/data/stories.yml
@@ -0,0 +1,14 @@
+version: "3.1"
+stories:
+- story: greet
+  steps:
+  - intent: greet
+  - action: utter_greet
+  - action: action_say_hello
+  - action: action_restart
+- story: say goodbye
+  steps:
+  - intent: deny
+  - action: utter_goodbye
+  - action: action_say_goodbye
+  - action: action_restart
diff --git a/tests/testing_data/validator/valid_data/domain.yml b/tests/testing_data/validator/valid_data/domain.yml
new file mode 100644
index 000000000..b59bba2be
--- /dev/null
+++ b/tests/testing_data/validator/valid_data/domain.yml
@@ -0,0 +1,26 @@
+version: "3.1"
+config:
+  store_entities_as_slots: true
+session_config:
+  session_expiration_time: 60
+  carry_over_slots_to_new_session: true
+intents:
+- greet:
+    use_entities: true
+- deny:
+    use_entities: true
+responses:
+  utter_goodbye:
+  - text: Bye
+  utter_greet:
+  - text: Hey! How are you?
+  utter_default:
+    - text: Can you rephrase!
+  utter_please_rephrase:
+    - text: I'm sorry, I didn't quite understand that. Could you rephrase?
+
+actions:
+- action_say_hello
+- action_say_goodbye
+- utter_greet
+- utter_goodbye
diff --git a/tests/unit_test/events/events_test.py b/tests/unit_test/events/events_test.py
index e92f040c6..10daa0bf8 100644
--- a/tests/unit_test/events/events_test.py
+++ b/tests/unit_test/events/events_test.py
@@ -256,9 +256,9 @@ def _path(*args, **kwargs):
         assert 'deny' in processor.fetch_intents(bot)
         assert len(processor.fetch_stories(bot)) == 2
         assert len(list(processor.fetch_training_examples(bot))) == 7
-        assert len(list(processor.fetch_responses(bot))) == 4
+        assert len(list(processor.fetch_responses(bot))) == 3
         assert len(processor.fetch_actions(bot)) == 2
-        assert len(processor.fetch_rule_block_names(bot)) == 4
+        assert len(processor.fetch_rule_block_names(bot)) == 3
 
     def test_trigger_data_importer_validate_and_save_append(self, monkeypatch):
         bot = 'test_events'
@@ -302,7 +302,7 @@ def _path(*args, **kwargs):
         assert len(list(processor.fetch_training_examples(bot))) == 13
         assert len(list(processor.fetch_responses(bot))) == 6
         assert len(processor.fetch_actions(bot)) == 2
-        assert len(processor.fetch_rule_block_names(bot)) == 4
+        assert len(processor.fetch_rule_block_names(bot)) == 3
 
     def test_trigger_data_importer_validate_and_save_overwrite_same_user(self, monkeypatch):
         bot = 'test_events'
@@ -338,9 +338,9 @@ def _path(*args, **kwargs):
         assert 'deny' in processor.fetch_intents(bot)
         assert len(processor.fetch_stories(bot)) == 2
         assert len(list(processor.fetch_training_examples(bot))) == 7
-        assert len(list(processor.fetch_responses(bot))) == 4
+        assert len(list(processor.fetch_responses(bot))) == 3
         assert len(processor.fetch_actions(bot)) == 2
-        assert len(processor.fetch_rule_block_names(bot)) == 4
+        assert len(processor.fetch_rule_block_names(bot)) == 3
 
     @responses.activate
     def test_trigger_data_importer_validate_event(self, monkeypatch):
@@ -836,9 +836,49 @@ def _path(*args, **kwargs):
         mongo_processor = MongoProcessor()
         assert len(mongo_processor.fetch_stories(bot)) == 3
         assert len(list(mongo_processor.fetch_training_examples(bot))) == 21
-        assert len(list(mongo_processor.fetch_responses(bot))) == 14
+        print(list(mongo_processor.fetch_responses(bot)))
+        assert len(list(mongo_processor.fetch_responses(bot))) == 12
         assert len(mongo_processor.fetch_actions(bot)) == 0
-        assert len(mongo_processor.fetch_rule_block_names(bot)) == 1
+        print(mongo_processor.fetch_rule_block_names(bot))
+        assert len(mongo_processor.fetch_rule_block_names(bot)) == 0
+
+    def test_trigger_data_importer_with_valid_data(self, monkeypatch):
+        bot = 'test_events_with_valid_data'
+        user = 'test'
+        test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4()))
+        shutil.copytree('tests/testing_data/validator/valid_data', test_data_path)
+
+        def _path(*args, **kwargs):
+            return test_data_path
+
+        monkeypatch.setattr(Utility, "get_latest_file", _path)
+
+        DataImporterLogProcessor.add_log(bot, user,
+                                         files_received=REQUIREMENTS - {"http_actions", "chat_client_config"})
+        TrainingDataImporterEvent(bot, user, import_data=True, overwrite=False).execute()
+        logs = list(DataImporterLogProcessor.get_logs(bot))
+        assert len(logs) == 1
+        assert not logs[0].get('intents').get('data')
+        assert not logs[0].get('stories').get('data')
+        assert not logs[0].get('utterances').get('data')
+        assert [action.get('data') for action in logs[0].get('actions') if action.get('type') == 'http_actions']
+        assert not logs[0].get('training_examples').get('data')
+        assert not logs[0].get('domain').get('data')
+        assert not logs[0].get('config').get('data')
+        assert not logs[0].get('exception')
+        assert logs[0]['start_timestamp']
+        assert logs[0]['end_timestamp']
+        assert logs[0]['status'] == 'Success'
+        assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value
+
+        processor = MongoProcessor()
+        assert 'greet' in processor.fetch_intents(bot)
+        assert 'deny' in processor.fetch_intents(bot)
+        assert len(processor.fetch_stories(bot)) == 2
+        assert len(list(processor.fetch_training_examples(bot))) == 7
+        assert len(list(processor.fetch_responses(bot))) == 4
+        assert len(processor.fetch_actions(bot)) == 2
+        assert len(processor.fetch_rule_block_names(bot)) == 4
 
     def test_trigger_faq_importer_validate_only(self, monkeypatch):
         def _mock_execution(*args, **kwargs):
diff --git a/tests/unit_test/validator/data_importer_test.py b/tests/unit_test/validator/data_importer_test.py
index 3a7e63729..781670a77 100644
--- a/tests/unit_test/validator/data_importer_test.py
+++ b/tests/unit_test/validator/data_importer_test.py
@@ -112,6 +112,27 @@ async def test_import_data(self):
         await importer.validate()
         importer.import_data()
 
+        processor = MongoProcessor()
+        assert 'greet' in processor.fetch_intents(bot)
+        assert 'deny' in processor.fetch_intents(bot)
+        assert len(processor.fetch_stories(bot)) == 2
+        assert len(list(processor.fetch_training_examples(bot))) == 7
+        assert len(list(processor.fetch_responses(bot))) == 3
+        assert len(processor.fetch_actions(bot)) == 2
+        assert len(processor.fetch_rule_block_names(bot)) == 3
+
+    @pytest.mark.asyncio
+    async def test_import_data_with_valid_data(self):
+        path = 'tests/testing_data/validator/valid_data'
+        bot = 'test_data_import_with_valid_data'
+        user = 'test'
+        test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4()))
+        shutil.copytree(path, test_data_path)
+        importer = DataImporter(test_data_path, bot, user,
+                                REQUIREMENTS - {"http_actions", "chat_client_config"}, True, True)
+        await importer.validate()
+        importer.import_data()
+
         processor = MongoProcessor()
         assert 'greet' in processor.fetch_intents(bot)
         assert 'deny' in processor.fetch_intents(bot)
@@ -138,9 +159,9 @@ async def test_import_data_with_multiflow(self):
         assert 'deny' in processor.fetch_intents(bot)
         assert len(processor.fetch_stories(bot)) == 2
         assert len(list(processor.fetch_training_examples(bot))) == 17
-        assert len(list(processor.fetch_responses(bot))) == 8
+        assert len(list(processor.fetch_responses(bot))) == 7
         assert len(processor.fetch_actions(bot)) == 3
-        assert len(processor.fetch_rule_block_names(bot)) == 4
+        assert len(processor.fetch_rule_block_names(bot)) == 3
         assert len(processor.fetch_multiflow_stories(bot)) == 2
 
     @pytest.mark.asyncio
@@ -164,7 +185,7 @@ async def test_import_data_append(self):
         assert len(list(processor.fetch_training_examples(bot))) == 13
         assert len(list(processor.fetch_responses(bot))) == 6
         assert len(processor.fetch_actions(bot)) == 2
-        assert len(processor.fetch_rule_block_names(bot)) == 4
+        assert len(processor.fetch_rule_block_names(bot)) == 3
 
     @pytest.mark.asyncio
     async def test_import_data_dont_save(self):
@@ -188,7 +209,7 @@ async def test_import_data_dont_save(self):
         assert len(list(processor.fetch_training_examples(bot))) == 13
         assert len(list(processor.fetch_responses(bot))) == 6
         assert len(processor.fetch_actions(bot)) == 2
-        assert len(processor.fetch_rule_block_names(bot)) == 4
+        assert len(processor.fetch_rule_block_names(bot)) == 3
 
         assert len(processor.fetch_intents(bot_2)) == 0
         assert len(processor.fetch_stories(bot_2)) == 0