From 433ca97e62df23a3bc73e1233f89d2f1923bae9a Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 3 Oct 2024 17:36:57 +0200 Subject: [PATCH 01/10] WIP: fix some leaks in add aspect Signed-off-by: Juanjo Alvarez --- .../Aspects/AspectOperatorAdd.cpp | 23 +++++++++++-------- .../_iast/_taint_tracking/Utils/StringUtils.h | 18 +++++++++++---- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp index 0d613375aca..280900e122e 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp @@ -22,16 +22,18 @@ add_aspect(PyObject* result_o, const size_t len_text_to_add{ get_pyobject_size(text_to_add) }; if (len_text_to_add == 0 and len_candidate_text > 0) { + Py_DECREF(result_o); return candidate_text; } if (len_text_to_add > 0 and len_candidate_text == 0 and text_to_add == result_o) { + Py_DECREF(result_o); return text_to_add; } const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map); if (to_candidate_text and to_candidate_text->get_ranges().size() >= TaintedObject::TAINT_RANGE_LIMIT) { const auto& res_new_id = new_pyobject_id(result_o); - Py_DecRef(result_o); + Py_DECREF(result_o); // If left side is already at the maximum taint ranges, we just reuse its // ranges, we don't need to look at left side. set_tainted_object(res_new_id, to_candidate_text, tx_taint_map); @@ -44,15 +46,15 @@ add_aspect(PyObject* result_o, } if (!to_text_to_add) { const auto& res_new_id = new_pyobject_id(result_o); - Py_DecRef(result_o); + Py_DECREF(result_o); set_tainted_object(res_new_id, to_candidate_text, tx_taint_map); return res_new_id; } - auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text); + // JJJ LEAK HERE + const auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); set_tainted_object(result_o, tainted, tx_taint_map); - return result_o; } @@ -84,6 +86,9 @@ api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) // PyNumber_Add actually works for any type! result_o = PyNumber_Add(candidate_text, text_to_add); + if (result_o == nullptr) { + return nullptr; + } TRY_CATCH_ASPECT("add_aspect", return result_o, , { const auto tx_map = Initializer::get_tainting_map(); @@ -116,8 +121,6 @@ api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) PyObject* api_add_inplace_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) { - PyObject* result_o = nullptr; - if (nargs != 2) { py::set_error(PyExc_ValueError, MSG_ERROR_N_PARAMS); return nullptr; @@ -125,7 +128,10 @@ api_add_inplace_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) PyObject* candidate_text = args[0]; PyObject* text_to_add = args[1]; - result_o = PyNumber_InPlaceAdd(candidate_text, text_to_add); + PyObject* result_o = PyNumber_InPlaceAdd(candidate_text, text_to_add); + if (result_o == nullptr) { + return nullptr; + } TRY_CATCH_ASPECT("add_inplace_aspect", return result_o, , { const auto tx_map = Initializer::get_tainting_map(); @@ -152,7 +158,6 @@ api_add_inplace_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) is_notinterned_notfasttainted_unicode(text_to_add)) { return result_o; } - candidate_text = add_aspect(result_o, candidate_text, text_to_add, tx_map); - return candidate_text; + return add_aspect(result_o, candidate_text, text_to_add, tx_map); }); } \ No newline at end of file diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h index 88ecfa0b8fb..71376e44503 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h @@ -52,15 +52,17 @@ is_text(const PyObject* pyptr) }; // Check that it's aligned correctly - if (reinterpret_cast(pyptr) % alignof(PyObject) != 0) return false;; + if (reinterpret_cast(pyptr) % alignof(PyObject) != 0) + return false; + ; // Try to safely access ob_type - if (const PyObject* temp = pyptr;!temp->ob_type) return false; + if (const PyObject* temp = pyptr; !temp->ob_type) + return false; return PyUnicode_Check(pyptr) or PyBytes_Check(pyptr) or PyByteArray_Check(pyptr); } - inline bool is_tainteable(const PyObject* pyptr) { @@ -79,12 +81,18 @@ template bool args_are_text_and_same_type(PyObject* first, PyObject* second, Args... args) { + const auto type_first = PyObject_Type(first); + const auto type_second = PyObject_Type(second); + // Check if both first and second are valid text types and of the same type - if (first == nullptr || second == nullptr || !is_text(first) || !is_text(second) || - PyObject_Type(first) != PyObject_Type(second)) { + if (first == nullptr || second == nullptr || !is_text(first) || !is_text(second) || type_first != type_second) { + Py_XDECREF(type_first); + Py_XDECREF(type_second); return false; } + Py_XDECREF(type_first); + Py_XDECREF(type_second); // Recursively check the rest of the arguments return args_are_text_and_same_type(second, args...); } From 1638b2c2c6a6d6f2acf1bc6a57ee331a96536aaa Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 12:57:49 +0200 Subject: [PATCH 02/10] Add nasty bug in add_aspect Signed-off-by: Juanjo Alvarez --- .../Aspects/AspectOperatorAdd.cpp | 27 ++++++++++++++++--- .../_taint_tracking/tests/test_other.cpp | 17 +++--------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp index 280900e122e..d0f82fdadc3 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp @@ -31,8 +31,13 @@ add_aspect(PyObject* result_o, } const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map); + if (to_candidate_text == nullptr) { + } if (to_candidate_text and to_candidate_text->get_ranges().size() >= TaintedObject::TAINT_RANGE_LIMIT) { const auto& res_new_id = new_pyobject_id(result_o); + if (res_new_id == nullptr) { + return result_o; + } Py_DECREF(result_o); // If left side is already at the maximum taint ranges, we just reuse its // ranges, we don't need to look at left side. @@ -46,16 +51,32 @@ add_aspect(PyObject* result_o, } if (!to_text_to_add) { const auto& res_new_id = new_pyobject_id(result_o); + if (res_new_id == nullptr) { + return result_o; + } Py_DECREF(result_o); set_tainted_object(res_new_id, to_candidate_text, tx_taint_map); return res_new_id; } - // JJJ LEAK HERE + const auto& res_new_id = new_pyobject_id(result_o); + + if (!to_candidate_text) { + const auto tainted = initializer->allocate_tainted_object(); + tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); + set_tainted_object(res_new_id, tainted, tx_taint_map); + Py_DECREF(result_o); + return res_new_id; + } + + // At this point we have both to_candidate_text and to_text_to_add to we add the + // ranges from both to result_o + const auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); - set_tainted_object(result_o, tainted, tx_taint_map); - return result_o; + set_tainted_object(res_new_id, tainted, tx_taint_map); + Py_DECREF(result_o); + return res_new_id; } /** diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_other.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_other.cpp index 214de1f3366..0a3ccf063bb 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_other.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_other.cpp @@ -1,7 +1,6 @@ -#include #include +#include #include -#include // jjj using PyByteArray_Check = PyEnvCheck; @@ -66,12 +65,7 @@ TEST_F(PyByteArray_Check, WithOtherTypes) if (PY_VERSION_HEX >= 0x03080000) { // PyByteArray_Check is bugged for user-defined classes in Python 3.7 // Test with a user-defined class - PyRun_String( - "class TestClass:\n pass\n\ntest_instance = TestClass()", - Py_file_input, - globals, - locals - ); + PyRun_String("class TestClass:\n pass\n\ntest_instance = TestClass()", Py_file_input, globals, locals); PyObject* user_obj = PyDict_GetItemString(locals, "test_instance"); EXPECT_FALSE(PyByteArray_Check(user_obj)) << "Failed for user-defined class"; } @@ -87,12 +81,7 @@ TEST_F(PyByteArray_Check, WithOtherTypes) Py_DECREF(complex_obj); // Test with Function - PyRun_String( - "def test_function():\n pass", - Py_file_input, - globals, - locals - ); + PyRun_String("def test_function():\n pass", Py_file_input, globals, locals); PyObject* func_obj = PyDict_GetItemString(locals, "test_function"); EXPECT_FALSE(PyByteArray_Check(func_obj)) << "Failed for Function type"; Py_DECREF(globals); From ba9ce378d3ecb5abd15282fdfb36e48f363096d5 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 13:02:37 +0200 Subject: [PATCH 03/10] fix Signed-off-by: Juanjo Alvarez --- .../appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp index d0f82fdadc3..f1c5c80341e 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp @@ -31,8 +31,6 @@ add_aspect(PyObject* result_o, } const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map); - if (to_candidate_text == nullptr) { - } if (to_candidate_text and to_candidate_text->get_ranges().size() >= TaintedObject::TAINT_RANGE_LIMIT) { const auto& res_new_id = new_pyobject_id(result_o); if (res_new_id == nullptr) { From f9e83b1ee60257de6dbccb0042f9ac46bf40a36a Mon Sep 17 00:00:00 2001 From: Alberto Vara Date: Fri, 4 Oct 2024 13:09:19 +0200 Subject: [PATCH 04/10] improve leak functions --- .gitignore | 13 ++- .../_taint_tracking/tests/test_add_aspect.cpp | 81 +++++++++++++++++++ .../_taint_tracking/tests/test_common.hpp | 23 ++++++ .../_taint_tracking/tests/test_helpers.cpp | 1 - .../tests/test_index_aspect.cpp | 1 - 5 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp diff --git a/.gitignore b/.gitignore index ce0f64a9433..f6b1906f30c 100644 --- a/.gitignore +++ b/.gitignore @@ -141,10 +141,21 @@ ddtrace/_version.py artifacts/ # IAST cpp +ddtrace/appsec/_iast/_taint_tracking/cmake_install.cmake +ddtrace/appsec/_iast/_taint_tracking/CMakeCache.txt +ddtrace/appsec/_iast/_taint_tracking/Makefile ddtrace/appsec/_iast/_taint_tracking/cmake-build-debug/* ddtrace/appsec/_iast/_taint_tracking/_deps/* ddtrace/appsec/_iast/_taint_tracking/CMakeFiles/* - +ddtrace/appsec/_iast/_taint_tracking/tests/CMakeFiles/* +ddtrace/appsec/_iast/_taint_tracking/tests/cmake_install.cmake +ddtrace/appsec/_iast/_taint_tracking/tests/CMakeCache.txt +ddtrace/appsec/_iast/_taint_tracking/tests/Makefile +ddtrace/appsec/_iast/_taint_tracking/tests/CTestTestfile.cmake +ddtrace/appsec/_iast/_taint_tracking/tests/native_tests* +ddtrace/appsec/_iast/_taint_tracking/_vendor/pybind11/Makefile +ddtrace/appsec/_iast/_taint_tracking/_vendor/pybind11/CMakeFiles/* +ddtrace/appsec/_iast/_taint_tracking/_vendor/pybind11/cmake_install.cmake # CircleCI generated config .circleci/config.gen.yml diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp new file mode 100644 index 00000000000..be3f34c4b1e --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp @@ -0,0 +1,81 @@ +#include "Aspects/Helpers.h" + +#include +#include +#include + +using AspectAddCheck = PyEnvWithContext; + + +TEST_F(AspectAddCheck, check_api_add_aspect_strings_candidate_text_empty) +{ + PyObject* candidate_text = this->StringToPyObjectStr(""); + PyObject* text_to_add = this->StringToPyObjectStr("def"); + PyObject* args_array[2]; + args_array[0] = candidate_text; + args_array[1] = text_to_add; + auto result = api_add_aspect(nullptr, args_array, 2); + EXPECT_FALSE(has_pyerr()); + + std::string result_string = this->PyObjectStrToString(result); + + EXPECT_EQ(result_string, "def"); + Py_DecRef(candidate_text); + Py_DecRef(text_to_add); + Py_DecRef(result); +} + +TEST_F(AspectAddCheck, check_api_add_aspect_strings_text_to_add_empty) +{ +PyObject* candidate_text = this->StringToPyObjectStr("abc"); +PyObject* text_to_add = this->StringToPyObjectStr(""); +PyObject* args_array[2]; +args_array[0] = candidate_text; +args_array[1] = text_to_add; +auto result = api_add_aspect(nullptr, args_array, 2); +EXPECT_FALSE(has_pyerr()); + +std::string result_string = this->PyObjectStrToString(result); + +EXPECT_EQ(result_string, "abc"); +Py_DecRef(candidate_text); +Py_DecRef(text_to_add); +Py_DecRef(result); +} + + +TEST_F(AspectAddCheck, check_api_add_aspect_strings) +{ + PyObject* candidate_text = this->StringToPyObjectStr("abc"); + PyObject* text_to_add = this->StringToPyObjectStr("def"); + PyObject* args_array[2]; + args_array[0] = candidate_text; + args_array[1] = text_to_add; + auto result = api_add_aspect(nullptr, args_array, 2); + EXPECT_FALSE(has_pyerr()); + + std::string result_string = this->PyObjectStrToString(result); + + EXPECT_EQ(result_string, "abcdef"); + Py_DecRef(candidate_text); + Py_DecRef(text_to_add); + Py_DecRef(result); +} + +TEST_F(AspectAddCheck, check_api_add_aspect_bytes) +{ + PyObject* candidate_text = this->StringToPyObjectBytes("abc"); + PyObject* text_to_add = this->StringToPyObjectBytes("def"); + PyObject* args_array[2]; + args_array[0] = candidate_text; + args_array[1] = text_to_add; + auto result = api_add_aspect(nullptr, args_array, 2); + EXPECT_FALSE(has_pyerr()); + + std::string result_string = this->PyObjectBytesToString(result); + + EXPECT_EQ(result_string, "abcdef"); + Py_DecRef(candidate_text); + Py_DecRef(text_to_add); + Py_DecRef(result); +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp index 81af3566b5a..173fdfcfd51 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp @@ -29,4 +29,27 @@ class PyEnvWithContext : public ::testing::Test initializer.reset(); py::finalize_interpreter(); } +public: + PyObject* StringToPyObjectStr(const string& ob) + { + return PyUnicode_FromString(ob.c_str()); + } + string PyObjectStrToString(PyObject* ob) + { + PyObject* utf8_str = PyUnicode_AsEncodedString(ob, "utf-8", "strict"); + const char* res_data = PyBytes_AsString(utf8_str); + std::string res_string(res_data); + Py_DecRef(utf8_str); + return res_string; + } + PyObject* StringToPyObjectBytes(const string& ob) + { + return PyBytes_FromString(ob.c_str()); + } + string PyObjectBytesToString(PyObject* ob) + { + const char* res_data = PyBytes_AsString(ob); + std::string res_string(res_data); + return res_string; + } }; diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index 1078e23a1e0..825a03ba787 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -1,5 +1,4 @@ #include -#include #include #include diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp index 444325d8fd4..2961cc80ea8 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp @@ -1,7 +1,6 @@ #include "Aspects/Helpers.h" #include -#include #include using AspectIndexCheck = PyEnvWithContext; From b4f6d4e187c8a0b8ad597753759ef5c1a0a24df3 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 13:14:00 +0200 Subject: [PATCH 05/10] test fix Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Utils/StringUtils.cpp | 12 ++++++++++-- .../appsec/_iast/_taint_tracking/Utils/StringUtils.h | 6 +++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp index 9a946c4e622..2df5ffa5e28 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp @@ -70,10 +70,12 @@ new_pyobject_id(PyObject* tainted_object) return nullptr; // Check that it's aligned correctly - if (reinterpret_cast(tainted_object) % alignof(PyObject) != 0) return tainted_object; + if (reinterpret_cast(tainted_object) % alignof(PyObject) != 0) + return tainted_object; // Try to safely access ob_type - if (const PyObject* temp = tainted_object;!temp->ob_type) return tainted_object; + if (const PyObject* temp = tainted_object; !temp->ob_type) + return tainted_object; py::gil_scoped_acquire acquire; @@ -110,6 +112,9 @@ new_pyobject_id(PyObject* tainted_object) const auto res = PyObject_CallFunctionObjArgs(bytes_join_ptr.ptr(), val, NULL); Py_XDECREF(val); Py_XDECREF(empty_bytes); + if (res == nullptr) { + return tainted_object; + } return res; } @@ -136,6 +141,9 @@ new_pyobject_id(PyObject* tainted_object) Py_XDECREF(val); Py_XDECREF(empty_bytes); Py_XDECREF(empty_bytearray); + if (res == nullptr) { + return tainted_object; + } return res; } return tainted_object; diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h index 71376e44503..d2da5dcd821 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h @@ -81,11 +81,15 @@ template bool args_are_text_and_same_type(PyObject* first, PyObject* second, Args... args) { + if (first == nullptr || second == nullptr) { + return false; + } + const auto type_first = PyObject_Type(first); const auto type_second = PyObject_Type(second); // Check if both first and second are valid text types and of the same type - if (first == nullptr || second == nullptr || !is_text(first) || !is_text(second) || type_first != type_second) { + if (!is_text(first) || !is_text(second) || type_first != type_second) { Py_XDECREF(type_first); Py_XDECREF(type_second); return false; From 950f40b8a5e374b2b7e9853b49635aacc9c09db5 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 16:28:43 +0200 Subject: [PATCH 06/10] Move TaintRangePtr to share_ptr Signed-off-by: Juanjo Alvarez --- .../Aspects/AspectOperatorAdd.cpp | 17 +++--------- .../Initializer/Initializer.cpp | 27 +++---------------- .../_taint_tracking/Initializer/Initializer.h | 2 -- .../TaintTracking/TaintRange.cpp | 8 ++---- .../TaintTracking/TaintRange.h | 4 +-- .../TaintTracking/TaintedObject.cpp | 23 ---------------- .../TaintTracking/TaintedObject.h | 9 +------ 7 files changed, 12 insertions(+), 78 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp index f1c5c80341e..e08d3135332 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp @@ -33,9 +33,6 @@ add_aspect(PyObject* result_o, const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map); if (to_candidate_text and to_candidate_text->get_ranges().size() >= TaintedObject::TAINT_RANGE_LIMIT) { const auto& res_new_id = new_pyobject_id(result_o); - if (res_new_id == nullptr) { - return result_o; - } Py_DECREF(result_o); // If left side is already at the maximum taint ranges, we just reuse its // ranges, we don't need to look at left side. @@ -47,33 +44,27 @@ add_aspect(PyObject* result_o, if (!to_candidate_text and !to_text_to_add) { return result_o; } + + const auto& res_new_id = new_pyobject_id(result_o); + Py_DECREF(result_o); + if (!to_text_to_add) { - const auto& res_new_id = new_pyobject_id(result_o); - if (res_new_id == nullptr) { - return result_o; - } - Py_DECREF(result_o); set_tainted_object(res_new_id, to_candidate_text, tx_taint_map); return res_new_id; } - const auto& res_new_id = new_pyobject_id(result_o); - if (!to_candidate_text) { const auto tainted = initializer->allocate_tainted_object(); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); set_tainted_object(res_new_id, tainted, tx_taint_map); - Py_DECREF(result_o); return res_new_id; } // At this point we have both to_candidate_text and to_text_to_add to we add the // ranges from both to result_o - const auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); set_tainted_object(res_new_id, tainted, tx_taint_map); - Py_DECREF(result_o); return res_new_id; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.cpp b/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.cpp index 73b934366e2..657d8a92e10 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.cpp @@ -14,7 +14,7 @@ Initializer::Initializer() { // Fill the taintedobjects stack for (int i = 0; i < TAINTEDOBJECTS_STACK_SIZE; i++) { - available_taintedobjects_stack.push(new TaintedObject()); + available_taintedobjects_stack.push(make_shared()); } // Fill the ranges stack @@ -41,9 +41,6 @@ Initializer::clear_tainting_map(const TaintRangeMapTypePtr& tx_map) return; } std::lock_guard lock(active_map_addreses_mutex); - for (const auto& [fst, snd] : *tx_map) { - snd.second->decref(); - } tx_map->clear(); active_map_addreses.erase(tx_map.get()); } @@ -114,12 +111,12 @@ TaintedObjectPtr Initializer::allocate_tainted_object() { if (!available_taintedobjects_stack.empty()) { - const auto& toptr = available_taintedobjects_stack.top(); + const auto toptr = available_taintedobjects_stack.top(); available_taintedobjects_stack.pop(); return toptr; } // Stack is empty, create new object - return new TaintedObject(); + return make_shared(); } TaintedObjectPtr @@ -147,24 +144,6 @@ Initializer::allocate_tainted_object_copy(const TaintedObjectPtr& from) return allocate_ranges_into_taint_object_copy(from->ranges_); } -void -Initializer::release_tainted_object(TaintedObjectPtr tobj) -{ - if (!tobj) { - return; - } - - tobj->reset(); - if (available_taintedobjects_stack.size() < TAINTEDOBJECTS_STACK_SIZE) { - available_taintedobjects_stack.push(tobj); - return; - } - - // Stack full, just delete the object (but to a reset before so ranges are - // reused or freed) - delete tobj; -} - TaintRangePtr Initializer::allocate_taint_range(const RANGE_START start, const RANGE_LENGTH length, const Source& origin) { diff --git a/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.h b/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.h index d453ce5527f..6d7fbf36499 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Initializer/Initializer.h @@ -140,8 +140,6 @@ class Initializer */ TaintedObjectPtr allocate_tainted_object_copy(const TaintedObjectPtr& from); - void release_tainted_object(TaintedObjectPtr tobj); - // FIXME: these should be static functions of TaintRange // IMPORTANT: if the returned object is not assigned to the map, you have // responsibility of calling release_taint_range on it or you'll have a leak. diff --git a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp index a15274cd5e0..ed50046cecf 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.cpp @@ -181,9 +181,8 @@ set_ranges(PyObject* str, const TaintRangeRefs& ranges, const TaintRangeMapTypeP auto new_tainted_object = initializer->allocate_ranges_into_taint_object(ranges); set_fast_tainted_if_notinterned_unicode(str); - new_tainted_object->incref(); if (it != tx_map->end()) { - it->second.second->decref(); + it->second.second.reset(); it->second = std::make_pair(get_internal_hash(str), new_tainted_object); return true; } @@ -312,7 +311,6 @@ get_tainted_object(PyObject* str, const TaintRangeMapTypePtr& tx_map) } if (get_internal_hash(str) != it->second.first) { - it->second.second->decref(); tx_map->erase(it); return nullptr; } @@ -368,15 +366,13 @@ set_tainted_object(PyObject* str, TaintedObjectPtr tainted_object, const TaintRa // If the tainted object is different, we need to decref the previous one // and incref the new one. But if it's the same object, we can avoid both // operations, since they would be redundant. - it->second.second->decref(); - tainted_object->incref(); + it->second.second.reset(); it->second = std::make_pair(get_internal_hash(str), tainted_object); } // Update the hash, because for bytearrays it could have changed after the extend operation it->second.first = get_internal_hash(str); return; } - tainted_object->incref(); tx_map->insert({ obj_id, std::make_pair(get_internal_hash(str), tainted_object) }); } diff --git a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h index 6d6e1a9a279..efb817a3359 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h +++ b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintRange.h @@ -17,7 +17,7 @@ namespace py = pybind11; class TaintedObject; // Alias -using TaintedObjectPtr = TaintedObject*; +using TaintedObjectPtr = shared_ptr; #ifdef NDEBUG // Decide wether to use abseil @@ -134,7 +134,7 @@ api_is_unicode_and_not_fast_tainted(const py::handle& str) return is_notinterned_notfasttainted_unicode(str.ptr()); } -TaintedObject* +TaintedObjectPtr get_tainted_object(PyObject* str, const TaintRangeMapTypePtr& tx_taint_map); Py_hash_t diff --git a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.cpp b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.cpp index 3a5aa2aa277..3b00a5d28e8 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.cpp @@ -140,34 +140,11 @@ void TaintedObject::reset() { move_ranges_to_stack(); - rc_ = 0; if (initializer) { ranges_.reserve(RANGES_INITIAL_RESERVE); } } -void -TaintedObject::incref() -{ - rc_++; -} - -void -TaintedObject::decref() -{ - if (--rc_ <= 0) { - release(); - } -} - -void -TaintedObject::release() -{ - // If rc_ is negative, there is a bug. - // assert(rc_ == 0); - initializer->release_tainted_object(this); -} - void pyexport_taintedobject(const py::module& m) { diff --git a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.h b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.h index a4198424a7c..88198fabcea 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.h +++ b/ddtrace/appsec/_iast/_taint_tracking/TaintTracking/TaintedObject.h @@ -8,7 +8,6 @@ class TaintedObject private: TaintRangeRefs ranges_; - size_t rc_{}; public: constexpr static int TAINT_RANGE_LIMIT = 100; @@ -36,7 +35,7 @@ class TaintedObject [[nodiscard]] TaintRangeRefs get_ranges_copy() const { return ranges_; } - void add_ranges_shifted(TaintedObject* tainted_object, + void add_ranges_shifted(TaintedObjectPtr tainted_object, RANGE_START offset, RANGE_LENGTH max_length = -1, RANGE_START orig_offset = -1); @@ -53,12 +52,6 @@ class TaintedObject void move_ranges_to_stack(); void reset(); - - void incref(); - - void decref(); - - void release(); }; void From b21cd21170e81667b0de5103945257a96e8a3e22 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 17:09:20 +0200 Subject: [PATCH 07/10] Fix leak in aspect join Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Aspects/AspectJoin.cpp | 13 +++++++------ scripts/iast/mod_leak_functions.py | 4 +++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectJoin.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectJoin.cpp index 60d4aa3dea1..476246e2a84 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectJoin.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectJoin.cpp @@ -55,7 +55,7 @@ aspect_join_str(PyObject* sep, PyObject* new_result{ new_pyobject_id(result) }; set_tainted_object(new_result, result_to, tx_taint_map); - Py_DecRef(result); + Py_DECREF(result); return new_result; } @@ -134,7 +134,7 @@ aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, const PyObject* new_result{ new_pyobject_id(result) }; set_tainted_object(new_result, result_to, tx_taint_map); - Py_DecRef(result); + Py_DECREF(result); return new_result; } @@ -158,9 +158,10 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) PyObject* list_aux = PyList_New(0); while ((item = PyIter_Next(iterator))) { PyList_Append(list_aux, item); + Py_DECREF(item); } arg0 = list_aux; - Py_DecRef(iterator); + Py_DECREF(iterator); decref_arg0 = true; } } @@ -181,7 +182,7 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) if (has_pyerr()) { if (decref_arg0) { - Py_DecRef(arg0); + Py_DECREF(arg0); } return nullptr; } @@ -190,13 +191,13 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) if (not ctx_map or ctx_map->empty() or get_pyobject_size(result) == 0) { // Empty result cannot have taint ranges if (decref_arg0) { - Py_DecRef(arg0); + Py_DECREF(arg0); } return result; } auto res = aspect_join(sep, result, arg0, ctx_map); if (decref_arg0) { - Py_DecRef(arg0); + Py_DECREF(arg0); } return res; }); diff --git a/scripts/iast/mod_leak_functions.py b/scripts/iast/mod_leak_functions.py index e43db154dbf..e55480a1d36 100644 --- a/scripts/iast/mod_leak_functions.py +++ b/scripts/iast/mod_leak_functions.py @@ -24,7 +24,9 @@ def test_doit(): string3 = string1 + string2 # 2 propagation ranges: hiroot1234 string4 = "-".join([string3, string3, string3]) # 6 propagation ranges: hiroot1234-hiroot1234-hiroot1234 - string5 = string4[0:20] # 1 propagation range: hiroot1234-hiroot123 + string4_2 = string1 + string4_2 += " " + " ".join(string_ for string_ in [string4, string4, string4]) + string5 = string4_2[0:20] # 1 propagation range: hiroot1234-hiroot123 string6 = string5.title() # 1 propagation range: Hiroot1234-Hiroot123 string7 = string6.upper() # 1 propagation range: HIROOT1234-HIROOT123 string8 = "%s_notainted" % string7 # 1 propagation range: HIROOT1234-HIROOT123_notainted From 4fbce49e35f796223160e3eb955c51fc7625a1ff Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 17:26:56 +0200 Subject: [PATCH 08/10] fmt Signed-off-by: Juanjo Alvarez --- .../_taint_tracking/tests/test_add_aspect.cpp | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp index be3f34c4b1e..85ac7dc3d95 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_add_aspect.cpp @@ -6,11 +6,10 @@ using AspectAddCheck = PyEnvWithContext; - TEST_F(AspectAddCheck, check_api_add_aspect_strings_candidate_text_empty) { PyObject* candidate_text = this->StringToPyObjectStr(""); - PyObject* text_to_add = this->StringToPyObjectStr("def"); + PyObject* text_to_add = this->StringToPyObjectStr("def"); PyObject* args_array[2]; args_array[0] = candidate_text; args_array[1] = text_to_add; @@ -27,27 +26,26 @@ TEST_F(AspectAddCheck, check_api_add_aspect_strings_candidate_text_empty) TEST_F(AspectAddCheck, check_api_add_aspect_strings_text_to_add_empty) { -PyObject* candidate_text = this->StringToPyObjectStr("abc"); -PyObject* text_to_add = this->StringToPyObjectStr(""); -PyObject* args_array[2]; -args_array[0] = candidate_text; -args_array[1] = text_to_add; -auto result = api_add_aspect(nullptr, args_array, 2); -EXPECT_FALSE(has_pyerr()); + PyObject* candidate_text = this->StringToPyObjectStr("abc"); + PyObject* text_to_add = this->StringToPyObjectStr(""); + PyObject* args_array[2]; + args_array[0] = candidate_text; + args_array[1] = text_to_add; + auto result = api_add_aspect(nullptr, args_array, 2); + EXPECT_FALSE(has_pyerr()); -std::string result_string = this->PyObjectStrToString(result); + std::string result_string = this->PyObjectStrToString(result); -EXPECT_EQ(result_string, "abc"); -Py_DecRef(candidate_text); -Py_DecRef(text_to_add); -Py_DecRef(result); + EXPECT_EQ(result_string, "abc"); + Py_DecRef(candidate_text); + Py_DecRef(text_to_add); + Py_DecRef(result); } - TEST_F(AspectAddCheck, check_api_add_aspect_strings) { PyObject* candidate_text = this->StringToPyObjectStr("abc"); - PyObject* text_to_add = this->StringToPyObjectStr("def"); + PyObject* text_to_add = this->StringToPyObjectStr("def"); PyObject* args_array[2]; args_array[0] = candidate_text; args_array[1] = text_to_add; @@ -65,7 +63,7 @@ TEST_F(AspectAddCheck, check_api_add_aspect_strings) TEST_F(AspectAddCheck, check_api_add_aspect_bytes) { PyObject* candidate_text = this->StringToPyObjectBytes("abc"); - PyObject* text_to_add = this->StringToPyObjectBytes("def"); + PyObject* text_to_add = this->StringToPyObjectBytes("def"); PyObject* args_array[2]; args_array[0] = candidate_text; args_array[1] = text_to_add; From ccbb60bb86e1f77fa42e1b0bfc9801b18e25750c Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 17:30:26 +0200 Subject: [PATCH 09/10] fmt Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/tests/test_common.hpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp index 173fdfcfd51..b7338e3d683 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp @@ -29,12 +29,10 @@ class PyEnvWithContext : public ::testing::Test initializer.reset(); py::finalize_interpreter(); } -public: - PyObject* StringToPyObjectStr(const string& ob) - { - return PyUnicode_FromString(ob.c_str()); - } - string PyObjectStrToString(PyObject* ob) + + public: + PyObject* StringToPyObjectStr(const string& ob) { return PyUnicode_FromString(ob.c_str()); } + string PyObjectStrToString(PyObject* ob) { PyObject* utf8_str = PyUnicode_AsEncodedString(ob, "utf-8", "strict"); const char* res_data = PyBytes_AsString(utf8_str); @@ -42,11 +40,8 @@ class PyEnvWithContext : public ::testing::Test Py_DecRef(utf8_str); return res_string; } - PyObject* StringToPyObjectBytes(const string& ob) - { - return PyBytes_FromString(ob.c_str()); - } - string PyObjectBytesToString(PyObject* ob) + PyObject* StringToPyObjectBytes(const string& ob) { return PyBytes_FromString(ob.c_str()); } + string PyObjectBytesToString(PyObject* ob) { const char* res_data = PyBytes_AsString(ob); std::string res_string(res_data); From d0eda4eb31e0fd71468cca340042abb39f2ced88 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 4 Oct 2024 20:05:48 +0200 Subject: [PATCH 10/10] fix Signed-off-by: Juanjo Alvarez --- .../Aspects/AspectOperatorAdd.cpp | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp index e08d3135332..a92a1436f95 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp @@ -21,13 +21,10 @@ add_aspect(PyObject* result_o, const size_t len_candidate_text{ get_pyobject_size(candidate_text) }; const size_t len_text_to_add{ get_pyobject_size(text_to_add) }; - if (len_text_to_add == 0 and len_candidate_text > 0) { - Py_DECREF(result_o); - return candidate_text; - } - if (len_text_to_add > 0 and len_candidate_text == 0 and text_to_add == result_o) { - Py_DECREF(result_o); - return text_to_add; + // Appended from or with nothing, ranges didn't change + if ((len_text_to_add == 0 and len_candidate_text > 0) || + (len_text_to_add > 0 and len_candidate_text == 0 and text_to_add == result_o)) { + return result_o; } const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map); @@ -44,11 +41,9 @@ add_aspect(PyObject* result_o, if (!to_candidate_text and !to_text_to_add) { return result_o; } - - const auto& res_new_id = new_pyobject_id(result_o); - Py_DECREF(result_o); - if (!to_text_to_add) { + const auto& res_new_id = new_pyobject_id(result_o); + Py_DECREF(result_o); set_tainted_object(res_new_id, to_candidate_text, tx_taint_map); return res_new_id; } @@ -56,16 +51,16 @@ add_aspect(PyObject* result_o, if (!to_candidate_text) { const auto tainted = initializer->allocate_tainted_object(); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); - set_tainted_object(res_new_id, tainted, tx_taint_map); - return res_new_id; + set_tainted_object(result_o, tainted, tx_taint_map); } // At this point we have both to_candidate_text and to_text_to_add to we add the // ranges from both to result_o const auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); - set_tainted_object(res_new_id, tainted, tx_taint_map); - return res_new_id; + set_tainted_object(result_o, tainted, tx_taint_map); + + return result_o; } /**