diff --git a/aws/logs_monitoring/enhanced_lambda_metrics.py b/aws/logs_monitoring/enhanced_lambda_metrics.py index 002494131..ac76666f1 100644 --- a/aws/logs_monitoring/enhanced_lambda_metrics.py +++ b/aws/logs_monitoring/enhanced_lambda_metrics.py @@ -2,6 +2,7 @@ # under the Apache License Version 2.0. # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2021 Datadog, Inc. +import json import os import logging import re @@ -60,6 +61,19 @@ INIT_DURATION_METRIC_NAME, ] +# Keys that appear in Lambda telemetry records emitted when JSON logs are enabled +MEMORY_ALLOCATED_RECORD_KEY = "memorySizeMB" +INIT_DURATION_RECORD_KEY = "initDurationMs" +DURATION_RECORD_KEY = "durationMs" +BILLED_DURATION_RECORD_KEY = "billedDurationMs" +MAX_MEMORY_USED_RECORD_KEY = "maxMemoryUsedMB" +RUNTIME_METRICS_BY_RECORD_KEY = { + # Except INIT_DURATION_RECORD_KEY which is handled separately + DURATION_RECORD_KEY: DURATION_METRIC_NAME, + BILLED_DURATION_RECORD_KEY: BILLED_DURATION_METRIC_NAME, + MAX_MEMORY_USED_RECORD_KEY: MAX_MEMORY_USED_METRIC_NAME, +} + # Multiply the duration metrics by 1/1000 to convert ms to seconds METRIC_ADJUSTMENT_FACTORS = { DURATION_METRIC_NAME: 0.001, @@ -202,8 +216,12 @@ def generate_enhanced_lambda_metrics(log, tags_cache): if not is_lambda_log: return [] + # Check if its Lambda lifecycle log that is emitted if log format is set to JSON + parsed_metrics = parse_metrics_from_json_report_log(log_message) + # Check if this is a REPORT log - parsed_metrics = parse_metrics_from_report_log(log_message) + if not parsed_metrics: + parsed_metrics = parse_metrics_from_report_log(log_message) # Check if this is a timeout if not parsed_metrics: @@ -254,6 +272,74 @@ def parse_lambda_tags_from_arn(arn): ] +def parse_metrics_from_json_report_log(log_message): + try: + body = json.loads(log_message) + except json.JSONDecodeError: + return [] + + stage = body.get("type", "") + record = body.get("record", {}) + record_metrics = record.get("metrics", {}) + + if stage != "platform.report" or not record_metrics: + return [] + + metrics = [] + + for record_key, metric_name in RUNTIME_METRICS_BY_RECORD_KEY.items(): + metric_point_value = record_metrics[record_key] + + if metric_name in METRIC_ADJUSTMENT_FACTORS: + metric_point_value *= METRIC_ADJUSTMENT_FACTORS[metric_name] + + metrics.append( + DatadogMetricPoint( + f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{metric_name}", + metric_point_value, + ) + ) + + tags = [ + f"{MEMORY_ALLOCATED_FIELD_NAME}:{record_metrics[MEMORY_ALLOCATED_RECORD_KEY]}" + ] + + init_duration = record_metrics.get(INIT_DURATION_RECORD_KEY) + if init_duration: + tags.append("cold_start:true") + metrics.append( + DatadogMetricPoint( + f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{INIT_DURATION_METRIC_NAME}", + init_duration * METRIC_ADJUSTMENT_FACTORS[INIT_DURATION_METRIC_NAME], + ) + ) + else: + tags.append("cold_start:false") + + metrics.append( + DatadogMetricPoint( + f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{ESTIMATED_COST_METRIC_NAME}", + calculate_estimated_cost( + record_metrics[BILLED_DURATION_RECORD_KEY], + record_metrics[MEMORY_ALLOCATED_RECORD_KEY], + ), + ) + ) + + if record.get("status") == "timeout": + metrics.append( + DatadogMetricPoint( + f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{TIMEOUTS_METRIC_NAME}", + 1.0, + ) + ) + + for metric in metrics: + metric.add_tags(tags) + + return metrics + + def parse_metrics_from_report_log(report_log_line): """Parses and returns metrics from the REPORT Lambda log diff --git a/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_generate_enhanced_lambda_metrics_json.approved.json b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_generate_enhanced_lambda_metrics_json.approved.json new file mode 100644 index 000000000..0568c4875 --- /dev/null +++ b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_generate_enhanced_lambda_metrics_json.approved.json @@ -0,0 +1,54 @@ +[ + { + "name": "aws.lambda.enhanced.duration", + "tags": [ + "memorysize:128", + "cold_start:false", + "region:us-east-1", + "account_id:172597598159", + "aws_account:172597598159", + "functionname:post-coupon-prod-us" + ], + "timestamp": 10000, + "value": 3.47065 + }, + { + "name": "aws.lambda.enhanced.billed_duration", + "tags": [ + "memorysize:128", + "cold_start:false", + "region:us-east-1", + "account_id:172597598159", + "aws_account:172597598159", + "functionname:post-coupon-prod-us" + ], + "timestamp": 10000, + "value": 3.5 + }, + { + "name": "aws.lambda.enhanced.max_memory_used", + "tags": [ + "memorysize:128", + "cold_start:false", + "region:us-east-1", + "account_id:172597598159", + "aws_account:172597598159", + "functionname:post-coupon-prod-us" + ], + "timestamp": 10000, + "value": 89 + }, + { + "name": "aws.lambda.enhanced.estimated_cost", + "tags": [ + "memorysize:128", + "cold_start:false", + "region:us-east-1", + "account_id:172597598159", + "aws_account:172597598159", + "functionname:post-coupon-prod-us" + ], + "timestamp": 10000, + "value": 7.49168125e-06 + } +] diff --git a/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_cold_start_json_report_log.approved.json b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_cold_start_json_report_log.approved.json new file mode 100644 index 000000000..4694661ff --- /dev/null +++ b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_cold_start_json_report_log.approved.json @@ -0,0 +1,47 @@ +[ + { + "name": "aws.lambda.enhanced.duration", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 0.0008100000000000001 + }, + { + "name": "aws.lambda.enhanced.billed_duration", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 0.1 + }, + { + "name": "aws.lambda.enhanced.max_memory_used", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 90 + }, + { + "name": "aws.lambda.enhanced.init_duration", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 1.234 + }, + { + "name": "aws.lambda.enhanced.estimated_cost", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 4.0833375e-07 + } +] diff --git a/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_json_report_log.approved.json b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_json_report_log.approved.json new file mode 100644 index 000000000..faa119621 --- /dev/null +++ b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_json_report_log.approved.json @@ -0,0 +1,38 @@ +[ + { + "name": "aws.lambda.enhanced.duration", + "tags": [ + "memorysize:128", + "cold_start:false" + ], + "timestamp": null, + "value": 0.00062 + }, + { + "name": "aws.lambda.enhanced.billed_duration", + "tags": [ + "memorysize:128", + "cold_start:false" + ], + "timestamp": null, + "value": 0.1 + }, + { + "name": "aws.lambda.enhanced.max_memory_used", + "tags": [ + "memorysize:128", + "cold_start:false" + ], + "timestamp": null, + "value": 51 + }, + { + "name": "aws.lambda.enhanced.estimated_cost", + "tags": [ + "memorysize:128", + "cold_start:false" + ], + "timestamp": null, + "value": 4.0833375e-07 + } +] diff --git a/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_timeout_json_report_log.approved.json b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_timeout_json_report_log.approved.json new file mode 100644 index 000000000..252e82643 --- /dev/null +++ b/aws/logs_monitoring/tests/approved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_timeout_json_report_log.approved.json @@ -0,0 +1,56 @@ +[ + { + "name": "aws.lambda.enhanced.duration", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 30.0 + }, + { + "name": "aws.lambda.enhanced.billed_duration", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 30.0 + }, + { + "name": "aws.lambda.enhanced.max_memory_used", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 74 + }, + { + "name": "aws.lambda.enhanced.init_duration", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 0.985413 + }, + { + "name": "aws.lambda.enhanced.estimated_cost", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 6.270012500000001e-05 + }, + { + "name": "aws.lambda.enhanced.timeouts", + "tags": [ + "memorysize:128", + "cold_start:true" + ], + "timestamp": null, + "value": 1.0 + } +] diff --git a/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py b/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py index e10f12df0..d1366a276 100644 --- a/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py +++ b/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py @@ -1,3 +1,4 @@ +import json import unittest import os from time import time @@ -7,6 +8,7 @@ from enhanced_lambda_metrics import ( parse_metrics_from_report_log, + parse_metrics_from_json_report_log, parse_lambda_tags_from_arn, generate_enhanced_lambda_metrics, create_out_of_memory_enhanced_metric, @@ -32,6 +34,56 @@ class TestEnhancedLambdaMetrics(unittest.TestCase): "XRAY TraceId: 1-5d83c0ad-b8eb33a0b1de97d804fac890\tSegmentId: 31255c3b19bd3637\t" "Sampled: true" ) + standard_json_report = json.dumps( + { + "time": "2024-10-04T00:36:35.800Z", + "type": "platform.report", + "record": { + "requestId": "4d789d71-2f2c-4c66-a4b5-531a0223233d", + "metrics": { + "durationMs": 0.62, + "billedDurationMs": 100, + "memorySizeMB": 128, + "maxMemoryUsedMB": 51, + }, + "status": "success", + }, + } + ) + cold_start_json_report = json.dumps( + { + "time": "2024-10-04T00:36:35.800Z", + "type": "platform.report", + "record": { + "requestId": "4d789d71-2f2c-4c66-a4b5-531a0223233d", + "metrics": { + "durationMs": 0.81, + "billedDurationMs": 100, + "memorySizeMB": 128, + "maxMemoryUsedMB": 90, + "initDurationMs": 1234, + }, + "status": "success", + }, + } + ) + timeout_json_report = json.dumps( + { + "time": "2024-10-18T19:38:39.661Z", + "type": "platform.report", + "record": { + "requestId": "b441820a-ebe5-4d5f-93bc-f945707b0225", + "metrics": { + "durationMs": 30000.0, + "billedDurationMs": 30000, + "memorySizeMB": 128, + "maxMemoryUsedMB": 74, + "initDurationMs": 985.413, + }, + "status": "timeout", + }, + } + ) def test_parse_lambda_tags_from_arn(self): verify_as_json( @@ -57,6 +109,24 @@ def test_parse_metrics_from_report_with_xray(self): parsed_metrics = parse_metrics_from_report_log(self.report_with_xray) verify_as_json(parsed_metrics) + def test_parse_metrics_from_json_no_report(self): + # Ensure we ignore unrelated JSON logs + parsed_metrics = parse_metrics_from_json_report_log('{"message": "abcd"}') + assert parsed_metrics == [] + + def test_parse_metrics_from_json_report_log(self): + parsed_metrics = parse_metrics_from_json_report_log(self.standard_json_report) + # The timestamps are None because the timestamp is added after the metrics are parsed + verify_as_json(parsed_metrics) + + def test_parse_metrics_from_cold_start_json_report_log(self): + parsed_metrics = parse_metrics_from_json_report_log(self.cold_start_json_report) + verify_as_json(parsed_metrics) + + def test_parse_metrics_from_timeout_json_report_log(self): + parsed_metrics = parse_metrics_from_json_report_log(self.timeout_json_report) + verify_as_json(parsed_metrics) + def test_create_out_of_memory_enhanced_metric(self): go_out_of_memory_error = "fatal error: runtime: out of memory" self.assertEqual( @@ -88,6 +158,47 @@ def test_create_out_of_memory_enhanced_metric(self): success_message = "Success!" self.assertEqual(len(create_out_of_memory_enhanced_metric(success_message)), 0) + def test_generate_enhanced_lambda_metrics_json( + self, + ): + tags_cache = LambdaTagsCache("") + tags_cache.get = MagicMock(return_value=[]) + + logs_input = { + "message": json.dumps( + { + "time": "2024-10-04T00:36:35.800Z", + "type": "platform.report", + "record": { + "requestId": "4d789d71-2f2c-4c66-a4b5-531a0223233d", + "metrics": { + "durationMs": 3470.65, + "billedDurationMs": 3500, + "memorySizeMB": 128, + "maxMemoryUsedMB": 89, + }, + "status": "success", + }, + } + ), + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/post-coupon-prod-us", + "logStream": "2019/09/25/[$LATEST]d6c10ebbd9cb48dba94a7d9b874b49bb", + "owner": "172597598159", + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:172597598159:function:collect_logs_datadog_demo", + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:172597598159:function:post-coupon-prod-us" + }, + "timestamp": 10000, + } + + generated_metrics = generate_enhanced_lambda_metrics(logs_input, tags_cache) + verify_as_json(generated_metrics) + def test_generate_enhanced_lambda_metrics(self): tags_cache = LambdaTagsCache("") tags_cache.get = MagicMock(return_value=[])