Merge pull request #778 from aws-solutions/feature/v6.1.4

Update to version v6.1.4
aws-solutions · Oct 31, 2024 · 9d4f54e · 9d4f54e
2 parents 44d70aa + 6b34b41
commit 9d4f54e
Show file tree

Hide file tree

Showing 65 changed files with 457 additions and 196 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [6.1.4] - 2024-10-31
+
+### Fixed
+- PII usage leaks and improvements. See [README](./source/docs/PII_Detection_And_Redaction/README.md).
+
+### Security
+- Patched http-proxy-middleware vulnerability
+
 ## [6.1.3] - 2024-10-17
 
 ### Security

diff --git a/README.md b/README.md
@@ -336,6 +336,7 @@ As QnABot evolves over the years, it makes use of various services and functiona
 _Note: **Deployable solution versions** refers to the ability to deploy the version of QnABot in their AWS accounts. **Actively supported versions** for QnABot is only available for the latest version of QnABot._
 
 ### Deployable Versions
+- [v6.1.4](https://github.com/aws-solutions/qnabot-on-aws/releases/tag/v6.1.4) - [Public](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.4/qnabot-on-aws-main.template)/[VPC](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.4/qnabot-on-aws-vpc.template)
 - [v6.1.3](https://github.com/aws-solutions/qnabot-on-aws/releases/tag/v6.1.3) - [Public](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.3/qnabot-on-aws-main.template)/[VPC](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.3/qnabot-on-aws-vpc.template)
 - [v6.1.2](https://github.com/aws-solutions/qnabot-on-aws/releases/tag/v6.1.2) - [Public](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.2/qnabot-on-aws-main.template)/[VPC](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.2/qnabot-on-aws-vpc.template)
 - [v6.1.1](https://github.com/aws-solutions/qnabot-on-aws/releases/tag/v6.1.1) - [Public](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.1/qnabot-on-aws-main.template)/[VPC](https://solutions-reference.s3.amazonaws.com/qnabot-on-aws/v6.1.1/qnabot-on-aws-vpc.template)

diff --git a/source/cli/aws_solutions/qnabot/cli/qnabot_cli.py b/source/cli/aws_solutions/qnabot/cli/qnabot_cli.py
@@ -15,7 +15,7 @@
 @click.pass_context
 def cli(ctx) -> None:
     os.environ["SOLUTION_ID"] = "SO0189"
-    os.environ["SOLUTION_VERSION"] = "v6.1.3"
+    os.environ["SOLUTION_VERSION"] = "v6.1.4"
 
 
 @cli.command("import")

diff --git a/source/docs/PII_Detection_And_Redaction/README.md b/source/docs/PII_Detection_And_Redaction/README.md
@@ -1,37 +1,40 @@
-# Personally Identifiable Information (PII) Rejection and Redaction
+# Personally Identifiable Information (PII) Redaction and Rejection in QnABot
 
-QnABot can now detect and redact Personally Identifiable Information (PII) using [Amazon Comprehend](https://docs.aws.amazon.com/comprehend/latest/dg/how-pii.html) and regular expressions.
-
-If ENABLE_REDACTING is set to "true", the Comprehend detected PII entities will also be redacted from Amazon CloudWatch logs and Amazon Opensearch logs.
-
-![settings image](./images/settings.png)
+QnABot now offers PII handling capabilities such as redaction, and rejection of sensitive information using Amazon Comprehend and regular expressions. The system can be configured to redact PII from CloudWatch logs, S3 and OpenSearch Dashboard, as well as reject an input containing PII. QnABot administrators can fine-tune the behavior through various settings, including confidence thresholds, specific PII entity types, and custom regex patterns. An optional feature allows for redaction of information in CloudWatch logs and feedback/metrics sent to S3 [MetricsBucket](../Technical%20Information.md) and OpenSearch Dashboard. These features provide a comprehensive solution for managing PII, improving privacy protection and regulatory compliance. 
+> **_NOTE:_** 
+These settings are disabled by default. QnABot administrators can customize these feature through the Content Designer UI Settings. 
 
 |Setting | Type of Value | Description |
 --------|---------------|-------------|
-| ENABLE_REDACTING | true or false | Enable the system to redact log output
-| REDACTING_REGEX | regex expression | Redacts expressions matching regex from logs
-| ENABLE_REDACTING_WITH_COMPREHEND | true or false | Enables [Amazon Comprehend based PII Redacting](https://aws.amazon.com/blogs/machine-learning/detecting-and-redacting-pii-using-amazon-comprehend/)
-| COMPREHEND_REDACTING_CONFIDENCE_SCORE | number (0 to 0.99) | Only redact PII where Amazon Comprehend's confidence score is greater than this number
+| ENABLE_REDACTING | true or false | Enables or disables the system's ability to redact log output using REDACTING_REGEX.
+| REDACTING_REGEX | regex expression | Defines patterns to be redacted from logs when ENABLE_REDACTING is true.
+| ENABLE_REDACTING_WITH_COMPREHEND | true or false | Enables PII Redaction using [Amazon Comprehend](https://aws.amazon.com/blogs/machine-learning/detecting-and-redacting-pii-using-amazon-comprehend/)
+| COMPREHEND_REDACTING_CONFIDENCE_SCORE | number (0 to 0.99) | Sets a threshold for PII redaction. Only PII detected with Amazon Comprehend's confidence score higher than this value will be redacted.
 | COMPREHEND_REDACTING_ENTITY_TYPES | comma separated list of [PII Entity Categories](https://aws.amazon.com/blogs/machine-learning/detecting-and-redacting-pii-using-amazon-comprehend/) | Only recognize PII entity types in the list for redaction
-| PII_REJECTION_ENABLED | true or false | Enables PII Rejection
-| PII_REJECTION_QUESTION | text  | If PII is found, the user's request (question) will change to this phrase
-| PII_REJECTION_CONFIDENCE_SCORE | number (0 to 0.99) | Only reject PII where Amazon Comprehend's confidence score is greater than this number
-| PII_REJECTION_REGEX | regex expression | Used to find PII based on a regex
+| PII_REJECTION_ENABLED | true or false |  Enables or disables the system's ability to reject input containing PII. It is recommended to also enable PII redaction by setting the ENABLE_REDACTING and/or the ENABLE_REDACTING_WITH_COMPREHEND if you are enabling PII rejection.
+| PII_REJECTION_QUESTION | text  |  If PII rejection is enabled and PII is detected, the user's original question will be replaced with this text.
+| PII_REJECTION_REGEX | Defines patterns to identify PII for rejection purposes.
+| PII_REJECTION_CONFIDENCE_SCORE | number (0 to 0.99) | Sets a threshold for PII rejection. Only PII detected with Amazon Comprehend's confidence score higher than this value will trigger rejection.
 | PII_REJECTION_ENTITY_TYPES | comma separated list of [PII Entity Categories](https://aws.amazon.com/blogs/machine-learning/detecting-and-redacting-pii-using-amazon-comprehend/) | Only recognize PII entity types in the list
 | DISABLE_CLOUDWATCH_LOGGING | true or false | Disable all logging in fulfillment es query handler lambda. does not disable logging from Lambda Hooks or Conditional Chaining Lambda functions
 
-# Optional Redact feature for log and metric output
+## Additional information on regex in settings REDACTING_REGEX and PII_REJECTION_REGEX
+
+QnABot offers a configurable, cost-effective PII detection feature using regular expressions. When enabled via the Designer UI Settings, this feature detects PII patterns defined by the regex and takes action based on the settings listed in previous sections. Administrators can customize the RegEx patterns to suit their specific PII detection needs.
 
-QnABot can be configured to redact information written to CloudWatch logs, S3 metrics, and OpenSearch Dashboards metrics logs.
-This feature is disabled by default. Use the Designer UI Settings form to enable this feature. One can configure
-the RegEx applied to strings as they are logged. If RegEx matches are found, the match is replaced with the string
-'XXXXXX'.
-
-The initial RegEx is
+The default RegEx:
 
 ```regex
 \b\d{4}\b(?![-])|\b\d{9}\b|\b\d{3}-\d{2}-\d{4}\b
 ```
 
 This replaces 4 digit numbers not followed by a hyphen, a 9 digit number (SSN without hyphens), and a typical
-SSN using nnn-nn-nnnn syntax with hyphens.
+SSN using nnn-nn-nnnn syntax with hyphens.
+
+An alternative RegEx to evaluate:
+
+```regex
+\b\d{4}\b(?![-])|\b\d{9}\b|\b\d{3}-\d{2}-\d{4}\b|\b\d{4}(-\d{4}){3}\b
+```
+
+The initial regex is more focused on catching SSNs and other potentially sensitive 4-digit or 9-digit numbers. The second regex tries to do that, but also tries to catch debit/credit card numbers in a common format.
diff --git a/source/lambda/aws-sdk-layer/package-lock.json b/source/lambda/aws-sdk-layer/package-lock.json
diff --git a/source/lambda/aws-sdk-layer/package.json b/source/lambda/aws-sdk-layer/package.json
@@ -1,6 +1,6 @@
 {
     "name": "aws-layer",
-    "version": "6.1.3",
+    "version": "6.1.4",
     "description": "QnABot Lambda aws-sdk-layer",
     "main": "index.js",
     "scripts": {

diff --git a/source/lambda/cfn-lambda-layer/package-lock.json b/source/lambda/cfn-lambda-layer/package-lock.json
diff --git a/source/lambda/cfn-lambda-layer/package.json b/source/lambda/cfn-lambda-layer/package.json
@@ -1,6 +1,6 @@
 {
     "name": "cfn-lambda-layer",
-    "version": "6.1.3",
+    "version": "6.1.4",
     "description": "QnABot Cfn Lambda Layer",
     "main": "index.js",
     "scripts": {

diff --git a/source/lambda/cfn/package-lock.json b/source/lambda/cfn/package-lock.json
diff --git a/source/lambda/cfn/package.json b/source/lambda/cfn/package.json
@@ -1,6 +1,6 @@
 {
     "name": "cfn",
-    "version": "6.1.3",
+    "version": "6.1.4",
     "description": "QnABot Cfn Lambda",
     "main": "index.js",
     "scripts": {

diff --git a/source/lambda/common-modules-layer/package-lock.json b/source/lambda/common-modules-layer/package-lock.json
diff --git a/source/lambda/common-modules-layer/package.json b/source/lambda/common-modules-layer/package.json
@@ -1,6 +1,6 @@
 {
     "name": "common-modules-layer",
-    "version": "6.1.3",
+    "version": "6.1.4",
     "description": "QnABot Common-modules-layer lambda",
     "main": "index.js",
     "scripts": {

diff --git a/source/lambda/connect/package-lock.json b/source/lambda/connect/package-lock.json
diff --git a/source/lambda/connect/package.json b/source/lambda/connect/package.json
@@ -1,6 +1,6 @@
 {
     "name": "connect",
-    "version": "6.1.3",
+    "version": "6.1.4",
     "description": "Lambda function used to support the Connect setup wizard",
     "repository": {
         "type": "git",

diff --git a/source/lambda/es-proxy-layer/lib/es-logging.js b/source/lambda/es-proxy-layer/lib/es-logging.js
@@ -11,23 +11,7 @@ const region = process.env.AWS_REGION || 'us-east-1';
 
 const qnabot = require('qnabot/logging');
 const qna_settings = require('qnabot/settings');
-
-function processKeysForRegEx(obj, re) {
-    Object.keys(obj).forEach((key, index) => {
-        const val = obj[key];
-        if (_.isPlainObject(val)) {
-            processKeysForRegEx(val, re);
-        } else if (key === 'slot') {
-            obj[key] = qnabot.redact_text(val);
-        } else if (key === 'recentIntentSummaryView') {
-            if (val) {
-                processKeysForRegEx(val, re);
-            }
-        } else if (typeof val === 'string') {
-            obj[key] = qnabot.redact_text(val);
-        }
-    });
-}
+const { processKeysForRedact } = require('./redactHelper');
 
 function stringifySessionAttribues(res) {
     const sessionAttrs = _.get(res, 'session', {});
@@ -38,7 +22,7 @@ function stringifySessionAttribues(res) {
     }
 }
 
-module.exports = function (event, context, callback) {
+module.exports = async function (event, context, callback) {
     // data to send to general metrics logging
     const date = new Date();
     const now = date.toISOString();
@@ -52,65 +36,66 @@ module.exports = function (event, context, callback) {
     stringifySessionAttribues(res);
 
     const redactEnabled = _.get(req, '_settings.ENABLE_REDACTING');
-    const redactRegex = _.get(req, '_settings.REDACTING_REGEX', '\\b\\d{4}\\b(?![-])|\\b\\d{9}\\b|\\b\\d{3}-\\d{2}-\\d{4}\\b');
+    const redactComprehendEnabled  =_.get(req, '_settings.ENABLE_REDACTING_WITH_COMPREHEND', false);
     const cloudwatchLoggingDisabled = _.get(req, '_settings.DISABLE_CLOUDWATCH_LOGGING');
 
     qna_settings.set_environment_variables(req._settings);
-    qnabot.setPIIRedactionEnvironmentVars(
+    await qnabot.setPIIRedactionEnvironmentVars(
         req._event.inputTranscript,
         _.get(req, '_settings.ENABLE_REDACTING_WITH_COMPREHEND', false),
         _.get(req, '_settings.REDACTING_REGEX', ''),
         _.get(req, '_settings.COMPREHEND_REDACTING_ENTITY_TYPES', ''),
         _.get(req, '_settings.COMPREHEND_REDACTING_CONFIDENCE_SCORE', 0.99),
-    ).then(async () => {
-        if (cloudwatchLoggingDisabled) {
-            qnabot.log('RESULT', 'cloudwatch logging disabled');
-        } else if (redactEnabled) {
-            qnabot.log('redact enabled');
-            const re = new RegExp(redactRegex, 'g');
-            processKeysForRegEx(req, re);
-            processKeysForRegEx(res, re);
-            processKeysForRegEx(sessionAttributes, re);
-            qnabot.log('RESULT', event);
-        } else {
-            qnabot.log('RESULT', event);
-        }
+    )
 
-        // constructing the object to be logged in OpenSearch (to visualize in OpenSearchDashboards)
-        const jsonData = {
-            entireRequest: req,
-            entireResponse: res,
-            qid: _.get(res.result, 'qid'),
-            utterance: String(req.question).toLowerCase().replace(/[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/g, ''),
-            answer: _.get(res, 'message'),
-            topic: _.get(res.result, 't', ''),
-            session: sessionAttributes,
-            clientType: req._clientType,
-            tags: _.get(res, 'tags', ''),
-            datetime: now,
-        };
+    if (cloudwatchLoggingDisabled) {
+        processKeysForRedact(res, false);
+        qnabot.log('RESULT', 'cloudwatch logging disabled');
+    } else if (redactEnabled || redactComprehendEnabled) {
+        processKeysForRedact(req, true);
+        processKeysForRedact(res, true);
+        processKeysForRedact(sessionAttributes, true);
+        qnabot.log('REDACTED RESULT', JSON.stringify(event, null, 2));
+    } else {
+        processKeysForRedact(req, false);
+        processKeysForRedact(res, false);
+        processKeysForRedact(sessionAttributes, false);
+        qnabot.log('RESULT',  JSON.stringify(event, null, 2));
+    }
 
-        if (cloudwatchLoggingDisabled) {
-            jsonData.entireRequest = undefined;
-            jsonData.utterance = undefined;
-            jsonData.session = undefined;
-        }
-        // encode to base64 string to put into firehose and
-        // append new line for proper downstream kinesis processing in OpenSearchDashboards and/or athena queries over s3
-        const objJsonStr = `${JSON.stringify(jsonData)}\n`;
-        const firehose = new FirehoseClient(customSdkConfig('C009', { region }));
+    // constructing the object to be logged in OpenSearch (to visualize in OpenSearchDashboards)
+    const jsonData = {
+        entireRequest: req,
+        entireResponse: res,
+        qid: _.get(res.result, 'qid'),
+        utterance: String(req.question).toLowerCase().replace(/[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/g, ''),
+        answer: _.get(res, 'message'),
+        topic: _.get(res.result, 't', ''),
+        session: sessionAttributes,
+        clientType: req._clientType,
+        tags: _.get(res, 'tags', ''),
+        datetime: now,
+    };
 
-        const params = {
-            DeliveryStreamName: process.env.FIREHOSE_NAME, /* required */
-            Record: { /* required */
-                Data: Buffer.from(objJsonStr), /* Strings will be Base-64 encoded on your behalf */ /* required */
-            },
-        };
-        try {
-            const data = await firehose.send(new PutRecordCommand(params));
-            qnabot.debug(data)
-        } catch (err) {
-            qnabot.log('An error occurred in Firehose PutRecordCommand: ', err);
-        }
-    });
+    if (cloudwatchLoggingDisabled) {
+        jsonData.entireRequest = undefined;
+        jsonData.utterance = undefined;
+        jsonData.session = undefined;
+    };
+    // encode to base64 string to put into firehose and
+    // append new line for proper downstream kinesis processing in OpenSearchDashboards and/or athena queries over s3
+    const objJsonStr = `${JSON.stringify(jsonData)}\n`;
+    const firehose = new FirehoseClient(customSdkConfig('C009', { region }));
+    const params = {
+        DeliveryStreamName: process.env.FIREHOSE_NAME, /* required */
+        Record: { /* required */
+            Data: Buffer.from(objJsonStr), /* Strings will be Base-64 encoded on your behalf */ /* required */
+        },
+    };
+    try {
+        const res = await firehose.send(new PutRecordCommand(params));
+        qnabot.debug(`Firehose Response: ${JSON.stringify(res, null, 2)}`)
+    } catch (err) {
+        qnabot.log('An error occurred in Firehose PutRecordCommand: ', err);
+    };
 };
diff --git a/source/lambda/es-proxy-layer/lib/fulfillment-event/getHit.js b/source/lambda/es-proxy-layer/lib/fulfillment-event/getHit.js
@@ -5,6 +5,7 @@
 
 const _ = require('lodash');
 const qnabot = require('qnabot/logging');
+const qna_settings = require('qnabot/settings');
 const handlebars = require('../handlebars');
 const kendra_fallback = require('../kendra');
 const kendra_retrieve = require('../kendraRetrieve');
@@ -16,6 +17,7 @@ const { encryptor } = require('./encryptor');
 const { runLlmQa } = require('./runLlmQa');
 const { updateResWithHit } = require('./updateResWithHit');
 const { bedrockRetrieveAndGenerate } = require('../bedrock/bedrockAgents');
+const { processKeysForRedact } = require('../redactHelper');
 
 async function runQuery(req, query_params, kendraIndex) {
     query_params.kendraIndex = kendraIndex;
@@ -128,6 +130,20 @@ async function invokeLambdaHook(hit, req, res) {
     const lambdaHook = _.get(hit, 'l');
     if (lambdaHook) {
         qnabot.log('Invoking Lambda Hook function: ', lambdaHook);
+        const redactEnabled = _.get(req, '_settings.ENABLE_REDACTING');
+        const redactComprehendEnabled  =_.get(req, '_settings.ENABLE_REDACTING_WITH_COMPREHEND', false);
+        if (lambdaHook.toLowerCase().includes('feedback') && (redactEnabled || redactComprehendEnabled) ) {
+            qna_settings.set_environment_variables(req._settings);
+            await qnabot.setPIIRedactionEnvironmentVars(
+                req._event.inputTranscript,
+                _.get(req, '_settings.ENABLE_REDACTING_WITH_COMPREHEND', false),
+                _.get(req, '_settings.REDACTING_REGEX', ''),
+                _.get(req, '_settings.COMPREHEND_REDACTING_ENTITY_TYPES', ''),
+                _.get(req, '_settings.COMPREHEND_REDACTING_CONFIDENCE_SCORE', 0.99),
+            );
+            processKeysForRedact(req, true)
+            processKeysForRedact(res, true)
+        }
         [req, res] = await invokeLambda(lambdaHook, req, res);
         // update hit with values returned in res by lambda hook
         _.set(hit, 'a', _.get(res, 'message', ''));