From f37743f713a90ec9b92f0d1ba130c52dc61bbe9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Carlos=20Jos=C3=A9=20Camacho?= <jcjc712@hotmail.com>
Date: Mon, 13 Nov 2023 10:19:11 -0600
Subject: [PATCH] Dh 4983 (#244)

* [DH-4983] Document response endpoint in Readme

* Document in read the docs the CSV download feature
---
 README.md                               | 17 ++++++++++
 dataherald/api/fastapi.py               |  3 ++
 docs/api.add_responses.rst              | 29 ++++++++++++++--
 docs/api.create_database_connection.rst | 29 +++++++++++++++-
 docs/api.get_response_file.rst          | 44 +++++++++++++++++++++++++
 docs/api.list_instructions.rst          |  2 +-
 docs/api.question.rst                   | 19 +++++++++++
 docs/api.rst                            |  2 ++
 8 files changed, 140 insertions(+), 5 deletions(-)
 create mode 100644 docs/api.get_response_file.rst
diff --git a/README.md b/README.md
index 88898907..397ae325 100644
--- a/README.md
+++ b/README.md
@@ -403,6 +403,23 @@ curl -X 'POST' \
     }'
 ```
 
+### Create new response based on a previously created question
+After utilizing the `questions` endpoint, you have the option to generate a new response associated with a specific question_id. 
+You can modify the `sql_query` to produce an alternative `sql_query_result` and a distinct response. In the event that you do not 
+specify a `sql_query`, the system will reprocess the question to generate the `sql_query`, execute the `sql_query_result`, and subsequently 
+generate the response.
+
+```
+curl -X 'POST' \
+  '<host>/api/v1/responses?run_evaluator=true&sql_response_only=false&generate_csv=false' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "question_id": "11fa1e419fe93d137536fe99",
+  "sql_query": "select * from sales order by created_at DESC limit 10"
+}'
+```
+
 ### Run scripts
 Within the `scripts` folder located inside the `dataherald` directory, you have the ability to upgrade your versions. 
 For instance, if you are currently using version 0.0.3 and wish to switch to version 0.0.4, simply execute the following command:
diff --git a/dataherald/api/fastapi.py b/dataherald/api/fastapi.py
index bf99cb8d..c520c340 100644
--- a/dataherald/api/fastapi.py
+++ b/dataherald/api/fastapi.py
@@ -492,6 +492,9 @@ def create_response(
         question_repository = QuestionRepository(self.storage)
         response_repository = ResponseRepository(self.storage)
         user_question = question_repository.find_by_id(query_request.question_id)
+        if not user_question:
+            raise HTTPException(status_code=404, detail="Question not found")
+
         db_connection_repository = DatabaseConnectionRepository(self.storage)
         database_connection = db_connection_repository.find_by_id(
             user_question.db_connection_id
diff --git a/docs/api.add_responses.rst b/docs/api.add_responses.rst
index ae4211ba..bc8b511f 100644
--- a/docs/api.add_responses.rst
+++ b/docs/api.add_responses.rst
@@ -1,8 +1,11 @@
 Create a new response
 =============================
 
-Once you made a question you can try sending a new sql query to improve the response, this creates a new
-`response` resource related to the `question` resource.
+After utilizing the `questions` endpoint, you have the option to generate a new `response`
+associated with a specific `question_id`. You can modify the `sql_query` to produce an alternative
+`sql_query_result` and a distinct response. In the event that you do not specify a `sql_query`,
+the system will reprocess the question to generate the `sql_query`, execute the `sql_query_result`,
+and subsequently generate the response.
 
 Request this ``POST`` endpoint::
 
@@ -14,9 +17,27 @@ Request this ``POST`` endpoint::
 
     {
       "question_id": "string", # required
-      "sql_query": "string" # required
+      "sql_query": "string" # optional
     }
 
+**Parameters**
+
+.. csv-table::
+   :header: "Name", "Type", "Description"
+   :widths: 20, 20, 60
+
+   "run_evaluator", "boolean", "If True it evaluates the generated `sql_query` and `sql_query_result`, ``Optional``"
+   "sql_response_only", "boolean", "If True it only runs the SQL and returns the `sql_query_result`, ``Optional``"
+   "generate_csv", "boolean", "If True it responses `sql_result` as NULL if it has more than 50 rows and generates the CSV file, ``Optional``"
+
+If the generate_csv flag is set to True, and the sql_query_result contains more than 50 rows, the system will utilize either
+the S3 credentials specified in the environment variables or those configured within the db_connection to generate the CSV file.
+The resulting file path will be structured as follows:
+
+.. code-block:: rst
+
+    "csv_file_path": "s3://k2-core/c6ddccfc-f355-4477-a2e7-e43f77e31bbb.csv"
+
 **Responses**
 
 HTTP 201 code response
@@ -39,6 +60,7 @@ HTTP 201 code response
           {}
         ]
       },
+      "csv_file_path": "string",
       "sql_generation_status": "NONE",
       "error_message": "string",
       "exec_time": 0,
@@ -95,6 +117,7 @@ HTTP 201 code response
           }
         ]
       },
+      "csv_file_path": null,
       "sql_generation_status": "VALID",
       "error_message": null,
       "exec_time": 37.183526277542114,
diff --git a/docs/api.create_database_connection.rst b/docs/api.create_database_connection.rst
index 1d2143bf..21ab53ab 100644
--- a/docs/api.create_database_connection.rst
+++ b/docs/api.create_database_connection.rst
@@ -10,6 +10,7 @@ You can include the API key for the LLM in the request body as an optional param
 
 You can find additional details on how to connect to each of the supported data warehouses :ref:`below <Supported Data warehouses>`.
 
+You have the flexibility to configure your own file storage service credentials, such as AWS S3, to manage the storage of all CSV files generated by the /questions or /responses endpoints.
 
 **Request this POST endpoint**::
 
@@ -35,7 +36,14 @@ You can find additional details on how to connect to each of the supported data
       "remote_db_password": "string",
       "private_key_password": "string",
       "db_driver": "string"
-    }
+    },
+    "file_storage": {
+        "name": "string",
+        "access_key_id": "string",
+        "secret_access_key": "string",
+        "region": "string",
+        "bucket": "string"
+      }
   }
 
 **SSH Parameters**
@@ -55,6 +63,18 @@ You can find additional details on how to connect to each of the supported data
     "private_key_password", "string", "The password for the id_rsa private key file, if it is password-protected"
     "db_driver", "string", "Set the database driver. For example, for PostgreSQL, the driver should be set to `postgresql+psycopg2`"
 
+**File Storage Parameters**
+
+.. csv-table::
+   :header: "Name", "Type", "Description"
+   :widths: 20, 20, 60
+
+    "name", "string", "Set S3, it is required"
+    "access_key_id", "string", "Your AWS access key, it is encrypted internally"
+    "secret_access_key", "string", "Your AWS secret access key, it is encrypted internally"
+    "region", "string", "Your bucket region"
+    "bucket", "string", "Your bucket name"
+
 **Responses**
 
 HTTP 201 code response
@@ -78,6 +98,13 @@ HTTP 201 code response
         "remote_db_password": "gAAAAABk8lHQpZyZ6ow8EuYPWe5haP-roQbBWkZn3trLgdO632IDoKcXAW-8yjzDDQ4uH03iWFzEgJq8HRxkJTC6Ht7Qrlz2PQ==",
         "private_key_password": "gAAAAABk8lHQWilFpIbCADvunHGYFMqgoPKIml_WRXf5Yuowqng28DVsq6-sChl695y5D_mWrr1I3hcJCZqkmhDqpma6iz3PKA==",
         "db_driver": "string"
+      },
+      "file_storage": {
+        "name": "S3",
+        "access_key_id": "gAAAAABk8lHQAaaSuoUKxddkMHw7jerwFmUeiE3hL6si06geRt8CV-r43fbckZjI6LbIULWPZ4HlQUF9_YpfaYfM6FarQbhDUQ==",
+        "secret_access_key": "gAAAAABk8lHQAaaSuoUKxddkMHw7jerwFmUeiE3hL6si06geRt8CV-r43fbckZjI6LbIULWPZ4HlQUF9_YpfaYfM6FarQbhDUQ==",
+        "region": "us-east-1",
+        "bucket": "my-bucket"
       }
     }
 
diff --git a/docs/api.get_response_file.rst b/docs/api.get_response_file.rst
new file mode 100644
index 00000000..fb7b98fd
--- /dev/null
+++ b/docs/api.get_response_file.rst
@@ -0,0 +1,44 @@
+Get a response file
+=============================
+
+After configuring your S3 credentials either through environment variables or within the db_connection endpoint, and
+enabling the generate_csv flag when making ``POST`` requests to ``/questions`` or ``/responses`` endpoints, once a file has been
+generated, you can utilize this endpoint to get the CSV file content.
+
+Request this ``GET`` endpoint::
+
+   /api/v1/responses/{response_id}/file
+
+**Parameters**
+
+.. csv-table::
+   :header: "Name", "Type", "Description"
+   :widths: 20, 20, 60
+
+   "response_id", "string", "The response id, ``Required``"
+
+
+**Responses**
+
+HTTP 200 code response
+
+.. code-block:: rst
+    The file content
+
+**Request example**
+
+.. code-block:: rst
+
+   curl -X 'GET' \
+  '<localhost>/api/v1/responses/64c424fa3f4036441e882352/file' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json'
+
+**Response example**
+
+.. code-block:: rst
+
+    customer,sales
+    Foo,12.0
+    Bar,39
+    ...
diff --git a/docs/api.list_instructions.rst b/docs/api.list_instructions.rst
index f6e4a06e..3e83e050 100644
--- a/docs/api.list_instructions.rst
+++ b/docs/api.list_instructions.rst
@@ -9,7 +9,7 @@ Request this ``GET`` endpoint::
 
     GET /api/v1/instructions
 
-** Parameters **
+**Parameters**
 
 .. csv-table::
    :header: "Name", "Type", "Description"
diff --git a/docs/api.question.rst b/docs/api.question.rst
index 31a2f6bf..ed50fb92 100644
--- a/docs/api.question.rst
+++ b/docs/api.question.rst
@@ -17,6 +17,23 @@ Request this ``POST`` endpoint::
       "question": "string"
     }
 
+**Parameters**
+
+.. csv-table::
+   :header: "Name", "Type", "Description"
+   :widths: 20, 20, 60
+
+   "run_evaluator", "boolean", "If True it evaluates the generated `sql_query` and `sql_query_result`, ``Optional``"
+   "generate_csv", "boolean", "If True it responses `sql_result` as NULL if it has more than 50 rows and generates the CSV file, ``Optional``"
+
+If the generate_csv flag is set to True, and the sql_query_result contains more than 50 rows, the system will utilize either
+the S3 credentials specified in the environment variables or those configured within the db_connection to generate the CSV file.
+The resulting file path will be structured as follows:
+
+.. code-block:: rst
+
+    "csv_file_path": "s3://k2-core/c6ddccfc-f355-4477-a2e7-e43f77e31bbb.csv"
+
 **Responses**
 
 HTTP 201 code response
@@ -39,6 +56,7 @@ HTTP 201 code response
           {}
         ]
       },
+      "csv_file_path": "string",
       "sql_generation_status": "NONE",
       "error_message": "string",
       "exec_time": 0,
@@ -85,6 +103,7 @@ HTTP 201 code response
           }
         ]
       },
+      "csv_file_path": null,
       "sql_generation_status": "VALID",
       "error_message": null,
       "exec_time": 37.183526277542114,
diff --git a/docs/api.rst b/docs/api.rst
index 554fc2f9..0072039a 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -54,6 +54,7 @@ The related endpoints are:
 * :doc:`add_responses <api.add_responses>` -- ``POST api/v1/responses``
 * :doc:`list_responses <api.list_responses>` -- ``GET api/v1/responses``
 * :doc:`get_response <api.get_response>` -- ``GET api/v1/responses/{response_id}``
+* :doc:`get_response_file <api.get_response_file>` -- ``GET api/v1/responses/{response_id}/file``
 
 **Response resource example:**
 
@@ -156,3 +157,4 @@ Related endpoints are:
     api.add_responses
     api.list_responses
     api.get_response
+    api.get_response_file