From 1e180f6d76d6d0ea3bd3602d24bbbcf34c1cb6e5 Mon Sep 17 00:00:00 2001 From: Pawel Srokosz Date: Tue, 16 Apr 2019 14:47:40 +0200 Subject: [PATCH] 2.4.0: Various changes --- .gitignore | 4 +- .gitlab-ci.yml | 19 +++ docs/Makefile | 19 +++ docs/make.bat | 35 +++++ docs/source/conf.py | 57 +++++++ docs/source/getexample.rst | 33 ++++ docs/source/index.rst | 30 ++++ docs/source/mwdblib.rst | 7 + docs/source/mwdbsecondary.rst | 11 ++ docs/source/mwdbtypes.rst | 13 ++ setup.py | 2 +- src/__init__.py | 13 ++ src/api.py | 51 +++++- src/blob.py | 26 +++- src/comment.py | 46 ++++++ src/config.py | 44 ++++++ src/core.py | 282 +++++++++++++++++++++++++--------- src/file.py | 37 ++++- src/object.py | 131 ++++++++++++---- src/share.py | 102 ++++++++++++ tests/__init__.py | 0 tests/e2etest_mwdblib.py | 94 ++++++++++++ tests/request_counter.py | 28 ++++ tests/test_mwdblib.py | 14 ++ 24 files changed, 983 insertions(+), 115 deletions(-) create mode 100644 .gitlab-ci.yml create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/conf.py create mode 100644 docs/source/getexample.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/mwdblib.rst create mode 100644 docs/source/mwdbsecondary.rst create mode 100644 docs/source/mwdbtypes.rst create mode 100644 src/comment.py create mode 100644 src/share.py create mode 100644 tests/__init__.py create mode 100644 tests/e2etest_mwdblib.py create mode 100644 tests/request_counter.py create mode 100644 tests/test_mwdblib.py diff --git a/.gitignore b/.gitignore index 0ea1815..9694fd3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ .idea *.pyc - build/ dist/ *egg-info/ +.vscode/ +venv/ +docs/build/ \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..56a8c3f --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,19 @@ +image: python:3.6 +test_style: + script: + - pip3 install flake8 + - flake8 . --exclude=venv --max-line-length 120 # imo 120 is way too much + +image: python:3.6 +test_code: + script: + - pip3 install -r requirements.txt + - pip3 install pytest + - pytest . + +image: python:3.6 +test_e2e: + script: + - pip3 install -r requirements.txt + - pip3 install pytest + - pytest tests/e2etest_mwdblib.py diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..69fe55e --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..543c6b1 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..435f39c --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,57 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + + +# -- Project information ----------------------------------------------------- + +project = 'mwdblib' +copyright = '2019, CERT Polska' +author = 'CERT Polska' + +# The full version, including alpha/beta/rc tags +release = '2.4.0' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.viewcode" +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinxdoc' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/source/getexample.rst b/docs/source/getexample.rst new file mode 100644 index 0000000..c1cfd84 --- /dev/null +++ b/docs/source/getexample.rst @@ -0,0 +1,33 @@ +Getting data automatically from Malwarecage +=========================================== + +Looking for recently uploaded files and retrieving them if file type contains "PE32": + +.. code-block:: python + + import itertools + import time + + from mwdblib import Malwarecage + + mwdb = Malwarecage(api_key="") + + def report_new_sample(sample): + print("Found new sample {} ({})".format(sample.name, sample.sha256)) + if "PE32" in sample.type: + with open(sample.id, "wb") as f: + f.write(sample.download()) + print("[+] PE32 downloaded successfully!") + + last_sample = None + while True: + top_sample = next(mwdb.recent_samples()).id + + if last_sample is not None: + for sample in itertools.takewhile(lambda s: s.id != last_sample.id, + mwdb.recent_samples()): + report_new_sample(sample) + + last_sample = top_sample + # Wait 10 minutes before next try + time.sleep(600) diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..fbcea8b --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,30 @@ +.. mwdblib documentation master file, created by + sphinx-quickstart on Mon Apr 15 15:48:25 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to mwdblib's documentation! +=================================== + +.. toctree:: + :maxdepth: 2 + :caption: API: + + mwdblib + mwdbtypes + mwdbsecondary + + +.. toctree:: + :maxdepth: 2 + :caption: Usage examples: + + getexample + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/mwdblib.rst b/docs/source/mwdblib.rst new file mode 100644 index 0000000..00c2b1a --- /dev/null +++ b/docs/source/mwdblib.rst @@ -0,0 +1,7 @@ +Malwarecage core interface +================================== + +.. automodule:: mwdblib + +.. autoclass:: Malwarecage + :members: diff --git a/docs/source/mwdbsecondary.rst b/docs/source/mwdbsecondary.rst new file mode 100644 index 0000000..450f818 --- /dev/null +++ b/docs/source/mwdbsecondary.rst @@ -0,0 +1,11 @@ +Secondary objects +================================== + +.. automodule:: mwdblib + +.. autoclass:: mwdblib.comment.MalwarecageComment + :members: +.. autoclass:: mwdblib.share.MalwarecageShare + :members: +.. autoclass:: mwdblib.share.MalwarecageShareReason + :members: diff --git a/docs/source/mwdbtypes.rst b/docs/source/mwdbtypes.rst new file mode 100644 index 0000000..04636dc --- /dev/null +++ b/docs/source/mwdbtypes.rst @@ -0,0 +1,13 @@ +Object types +================================== + +.. automodule:: mwdblib + +.. autoclass:: MalwarecageObject + :members: +.. autoclass:: MalwarecageFile + :members: +.. autoclass:: MalwarecageConfig + :members: +.. autoclass:: MalwarecageBlob + :members: diff --git a/setup.py b/setup.py index 2e558fa..40f8240 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from distutils.core import setup setup(name="mwdblib", - version="2.3.0", + version="2.4.0", description="malwaredb API bindings for Python", author="psrok1", package_dir={'mwdblib': 'src'}, diff --git a/src/__init__.py b/src/__init__.py index bc0484d..0ae2801 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,2 +1,15 @@ from .core import Malwarecage from .api import MalwarecageAPI +from .file import MalwarecageFile +from .object import MalwarecageObject +from .config import MalwarecageConfig +from .blob import MalwarecageBlob + +__all__ = [ + 'Malwarecage', + 'MalwarecageAPI', + 'MalwarecageFile', + 'MalwarecageObject', + 'MalwarecageConfig', + 'MalwarecageBlob', +] diff --git a/src/api.py b/src/api.py index d7882a9..8653e69 100644 --- a/src/api.py +++ b/src/api.py @@ -1,5 +1,6 @@ import base64 import json +import warnings try: from urlparse import urljoin @@ -15,16 +16,31 @@ class MalwarecageAPI(object): - def __init__(self, api_url=API_URL, api_key=None): + def __init__(self, api_url=API_URL, api_key=None, verify_ssl=False): self.api_url = api_url self.api_key = None self.session = requests.Session() self.set_api_key(api_key) + self.username = None + self.password = None + self.verify_ssl = verify_ssl def set_api_key(self, api_key): self.api_key = api_key self.session.headers.update({'Authorization': 'Bearer {}'.format(self.api_key)}) + def login(self, username, password): + warnings.warn("Password-authenticated sessions are short lived, so password needs to be stored " + "in MalwarecageAPI object. Ask Malwarecage instance administrator for an API key " + "(send e-mail to info@cert.pl if you use mwdb.cert.pl)") + result = self.post("auth/login", json={ + "login": username, + "password": password + }, noauth=True) + self.username = username + self.password = password + self.set_api_key(result["token"]) + def logged_user(self): if self.api_key is None: return None @@ -41,10 +57,35 @@ def request(self, method, url, noauth=False, raw=False, *args, **kwargs): # Set method name and request URL url = urljoin(self.api_url, url) # Set default kwargs - kwargs["verify"] = False - kwargs["json"] = kwargs.get("json", True) - response = self.session.request(method, url, *args, **kwargs) - response.raise_for_status() + kwargs["verify"] = self.verify_ssl + + # If there are both 'form data' and 'json' passed - we need to pack them into multipart/form-data + if "data" in kwargs and "json" in kwargs: + files = kwargs.get("files", {}) + files["json"] = (None, json.dumps(kwargs["json"]), "application/json") + del kwargs["json"] + + def try_request(): + response = self.session.request(method, url, *args, **kwargs) + response.raise_for_status() + return response + + try: + response = try_request() + except requests.HTTPError as e: + # If not unauthorized: re-raise + if e.response.status_code != requests.codes.unauthorized: + raise + # Forget api_key + self.api_key = None + # If authenticated using api_key: re-raise + if self.username is None: + raise + # Try to log in + self.login(self.username, self.password) + # Repeat failed request + response = try_request() + return response.json() if not raw else response.content def get(self, *args, **kwargs): diff --git a/src/blob.py b/src/blob.py index b913fe5..105d096 100644 --- a/src/blob.py +++ b/src/blob.py @@ -4,44 +4,64 @@ class MalwarecageBlob(MalwarecageObject): URL_PATTERN = "blob/{id}" + @staticmethod + def create(api, data): + return MalwarecageBlob(api, data) + @lazy_property() def blob_name(self): + """ + Blob name + """ return self.data.get("blob_name") @lazy_property() def blob_size(self): + """ + Blob size in bytes + """ return self.data.get("blob_size") @lazy_property() def blob_type(self): + """ + Blob semantic type + """ return self.data.get("blob_type") @property def name(self): """ - Alias for blob_name property + Alias for :py:attr:`blob_name` property """ return self.blob_name @property def size(self): """ - Alias for blob_size property + Alias for :py:attr:`blob_size` property """ return self.blob_size @property def type(self): """ - Alias for blob_type property + Alias for :py:attr:`blob_type` property """ return self.blob_type @lazy_property() def content(self): + """ + Contains blob content + """ return self.data.get("content") @lazy_property() def last_seen(self): + """ + :rtype: datetime.datetime + :return: datetime object when blob was last seen in Malwarecage + """ import dateutil.parser return dateutil.parser.parse(self.data["last_seen"]) if "last_seen" in self.data else None diff --git a/src/comment.py b/src/comment.py new file mode 100644 index 0000000..9c08a79 --- /dev/null +++ b/src/comment.py @@ -0,0 +1,46 @@ +from .object import MalwarecageElement + + +class MalwarecageComment(MalwarecageElement): + """ + Represents comment for Malwarecage object + """ + def __init__(self, api, data, parent): + super(MalwarecageComment, self).__init__(api, data) + self.parent = parent + + @property + def author(self): + """ + Comment author + + :rtype: str + """ + return self.data["author"] + + @property + def timestamp(self): + """ + Comment timestamp + + :rtype: datetime.datetime + """ + import dateutil.parser + return dateutil.parser.parse(self.data["timestamp"]) + + @property + def comment(self): + """ + Comment text + + :rtype: str + """ + return self.data["comment"] + + def delete(self): + """ + Deletes this comment + + :raises: requests.exceptions.HTTPError + """ + self.api.delete("object/{}/comment/{}".format(self.parent.id, self.id)) diff --git a/src/config.py b/src/config.py index 36d22a1..229e534 100644 --- a/src/config.py +++ b/src/config.py @@ -4,10 +4,54 @@ class MalwarecageConfig(MalwarecageObject): URL_PATTERN = "config/{id}" + @staticmethod + def create(api, data): + return MalwarecageConfig(api, data) + + def _update(self, data): + if "cfg" in data: + from .blob import MalwarecageBlob + data = dict(data) + data["config"] = {k: (MalwarecageBlob(self.api, {"id": v["in-blob"]}) + if isinstance(v, dict) and "in-blob" in v + else v) + for k, v in data["cfg"].items()} + super(MalwarecageConfig, self)._update(data) + @lazy_property() def family(self): + """ + Configuration family + """ return self.data.get("family") + @lazy_property() + def type(self): + """ + Configuration type ('static' or 'dynamic') + """ + return self.data.get("config_type") + @lazy_property() def cfg(self): + """ + dict object with configuration + """ + return self.data.get("config") + + @lazy_property() + def config_dict(self): + """ + raw dict object with configuration + (in-blob keys are not mapped to :class:`MalwarecageBlob` objects) + """ return self.data.get("cfg") + + @property + def config(self): + """ + dict object with configuration + + .. seealso:: :py:attr:`config_dict` + """ + return self.cfg diff --git a/src/core.py b/src/core.py index 2780925..eca23de 100644 --- a/src/core.py +++ b/src/core.py @@ -11,23 +11,50 @@ class Malwarecage(object): - def __init__(self, api=None): - self.api = api or MalwarecageAPI() + """ + Main object used for communication with Malwarecage + + :param api: Custom :class:`MalwarecageAPI` used to communicate with Malwarecage + :type api: :class:`MalwarecageAPI`, optional + :param api_key: API key used for authentication (omit if password-based authentication is used) + :type api_key: str, optional + + Usage example: + + .. code-block:: python + + from mwdblib import Malwarecage + + mwdb = Malwarecage() + mwdb.login("example", "") + + file = mwdb.query_file("3629344675705286607dd0f680c66c19f7e310a1") + + """ + + def __init__(self, api=None, api_key=None): + self.api = api or MalwarecageAPI(api_key=api_key) def login(self, username, password): """ Performs user authentication using provided username and password. - If you want to authenticate using API key - initialize Malwarecage object with MalwarecageAPI instance + + .. warning:: + + Keep in mind that password-authenticated sessions are short lived, so password needs to be stored + in :class:`MalwarecageAPI` object. Ask Malwarecage instance administrator for an API key (or send e-mail to + info@cert.pl if you use mwdb.cert.pl) + + .. versionadded:: 2.4.0 + Malwarecage tries to reauthenticate on first Unauthorized exception :param username: User name + :type username: str :param password: Password + :type password: str :raises: requests.exceptions.HTTPError """ - result = self.api.post("auth/login", json={ - "login": username, - "password": password - }, noauth=True) - self.api.set_api_key(result["token"]) + self.api.login(username, password) def _recent(self, endpoint, query=None): try: @@ -48,10 +75,25 @@ def _recent(self, endpoint, query=None): def recent_objects(self): """ Retrieves recently uploaded objects - If you already know type of object you are looking for - use specialized variants: - - recent_files - - recent_configs - - recent_blobs + If you already know type of object you are looking for, use specialized variants: + + - :py:meth:`recent_files` + - :py:meth:`recent_configs` + - :py:meth:`recent_blobs` + + Usage example: + + .. code-block:: python + + from mwdblib import Malwarecage + from itertools import islice + + mwdb = Malwarecage() + mwdb.login("admin", "password123") + + # recent_files is generator, do not execute list(recent_files)! + files = islice(mwdb.recent_files(), 25) + print([(f.name, f.tags) for f in files]) :rtype: Iterator[:class:`MalwarecageObject`] :raises: requests.exceptions.HTTPError @@ -85,63 +127,96 @@ def recent_blobs(self): """ return self._recent("blob") - def query(self, hash): + def _query(self, object_type, hash, raise_not_found): + try: + result = self.api.get(object_type.URL_PATTERN.format(id=hash)) + return object_type.create(self.api, result) + except requests.exceptions.HTTPError as e: + if not raise_not_found and e.response.status_code == requests.codes.not_found: + return None + else: + raise + + def query(self, hash, raise_not_found=True): """ Queries for object using provided hash. - If you already know type of object you are looking for - use specialized variants: - - query_file - - query_config - - query_blob + If you already know type of object you are looking for, use specialized variants: + + - :py:meth:`query_file` + - :py:meth:`query_config` + - :py:meth:`query_blob` + + .. versionadded:: 2.4.0 + Added raise_not_found optional argument :param hash: Object hash (identifier, MD5, SHA-1, SHA-2) - :rtype: :class:`MalwarecageObject` + :type hash: str + :param raise_not_found: If True (default), method raises HTTPError when object is not found + :type raise_not_found: bool, optional + :rtype: :class:`MalwarecageObject` or None (if raise_not_found=False) :raises: requests.exceptions.HTTPError """ - result = self.api.get("object/{}".format(hash)) - return MalwarecageObject.create(self.api, result) + return self._query(MalwarecageObject, hash, raise_not_found) - def query_file(self, hash): + def query_file(self, hash, raise_not_found=True): """ Queries for file using provided hash :param hash: Object hash (identifier, MD5, SHA-1, SHA-2) - :rtype: :class:`MalwarecageFile` + :type hash: str + :param raise_not_found: If True (default), method raises HTTPError when object is not found + :type raise_not_found: bool + :rtype: :class:`MalwarecageFile` or None (if raise_not_found=False) :raises: requests.exceptions.HTTPError """ - result = self.api.get("file/{}".format(hash)) - return MalwarecageFile(self.api, result) + return self._query(MalwarecageFile, hash, raise_not_found) - def query_config(self, hash): + def query_config(self, hash, raise_not_found=True): """ Queries for configuration object using provided hash - :param hash: Object hash (identifier, MD5, SHA-1, SHA-2) - :rtype: :class:`MalwarecageConfig` + :param hash: Object hash (SHA-256 identifier) + :type hash: str + :param raise_not_found: If True (default), method raises HTTPError when object is not found + :type raise_not_found: bool + :rtype: :class:`MalwarecageConfig` or None (if raise_not_found=False) :raises: requests.exceptions.HTTPError """ - result = self.api.get("config/{}".format(hash)) - return MalwarecageConfig(self.api, result) + return self._query(MalwarecageConfig, hash, raise_not_found) - def query_blob(self, hash): + def query_blob(self, hash, raise_not_found=True): """ Queries for blob object using provided hash - :param hash: Object hash (identifier, MD5, SHA-1, SHA-2) - :rtype: :class:`MalwarecageConfig` + :param hash: Object hash (SHA-256 identifier) + :type hash: str + :param raise_not_found: If True (default), method raises HTTPError when object is not found + :type raise_not_found: bool + :rtype: :class:`MalwarecageBlob` or None (if raise_not_found=False) :raises: requests.exceptions.HTTPError """ - result = self.api.get("blob/{}".format(hash)) - return MalwarecageBlob(self.api, result) + return self._query(MalwarecageBlob, hash, raise_not_found) def search(self, query): """ Advanced search for objects using Lucene syntax. - If you already know type of object you are looking for - use specialized variants: - - search_files - - search_configs - - search_blobs + If you already know type of object you are looking for, use specialized variants: + + - :py:meth:`search_files` + - :py:meth:`search_configs` + - :py:meth:`search_blobs` + + Usage example: + + .. code-block:: python + + from mwdblib import Malwarecage + + # Search for samples tagged as evil and with size less than 100kB + results = mwdb.search_files("tag:evil AND file.size:[0 TO 100000]") :param query: Search query + :type query: str :rtype: Iterator[:class:`MalwarecageObject`] :raises: requests.exceptions.HTTPError """ @@ -154,6 +229,7 @@ def search_files(self, query): Advanced search for files using Lucene syntax. :param query: Search query + :type query: str :rtype: Iterator[:class:`MalwarecageFile`] :raises: requests.exceptions.HTTPError """ @@ -164,6 +240,7 @@ def search_configs(self, query): Advanced search for configuration objects using Lucene syntax. :param query: Search query + :type query: str :rtype: Iterator[:class:`MalwarecageConfig`] :raises: requests.exceptions.HTTPError """ @@ -174,11 +251,25 @@ def search_blobs(self, query): Advanced search for blob objects using Lucene syntax. :param query: Search query + :type query: str :rtype: Iterator[:class:`MalwarecageBlob`] :raises: requests.exceptions.HTTPError """ return self._recent("blob", query) + @staticmethod + def _convert_bytes(data): + if isinstance(data, dict): + return dict(map(Malwarecage._convert_bytes, data.items())) + + if isinstance(data, bytes): + return data.decode('utf-8', 'replace') + + if isinstance(data, (tuple, list)): + return list(map(Malwarecage._convert_bytes, data)) + + return data + def _upload(self, type, parent=None, metakeys=None, share_with=None, private=False, public=False, req_files=None, req_json=None): @@ -193,9 +284,11 @@ def _upload(self, type, parent=None, metakeys=None, if isinstance(metakeys, dict): metakeys = [{"key": key, "value": value} - for key, value_list in metakeys.iteritems() + for key, value_list in metakeys.items() for value in (value_list if isinstance(value_list, list) else [value_list])] + if private and public: + raise ValueError("Sample can't be both private and public") if public: share_with = "public" if private: @@ -204,45 +297,87 @@ def _upload(self, type, parent=None, metakeys=None, result = self.api.post("{}/{}".format(type, parent), data={ 'metakeys': json.dumps({'metakeys': metakeys}), 'upload_as': share_with or "*" - }, files=req_files, json=req_json) + }, files=req_files, json=self._convert_bytes(req_json)) return result def upload_file(self, name, content, **kwargs): """ Upload file object - :param name: Original file name + :param name: Original file name (see also :py:attr:`MalwarecageFile.file_name`) + :type name: str :param content: File contents - :param parent: (optional) Parent object or parent identifier - :param metakeys: (optional) Dictionary with metakeys. - If you want to set many values with the same key: use list - :param share_with: (optional) Group name you want to share object with - :param private: (optional) True if sample should be uploaded as private - :param public: (optional) True if sample should be visible for everyone + :type content: bytes + :param parent: Parent object or parent identifier + :type parent: :class:`MalwarecageObject` or str, optional + :param metakeys: Dictionary with metakeys. + If you want to set many values with the same key: use list as value + :type metakeys: dict, optional + :param share_with: Group name you want to share object with + :type share_with: str, optional + :param private: True if sample should be uploaded as private + :type private: bool, optional + :param public: True if sample should be visible for everyone + :type public: bool, optional :rtype: :class:`MalwarecageFile` - :raises: requests.exceptions.HTTPError + :raises: :class:`requests.exceptions.HTTPError`, :class:`ValueError` + + Usage example: + + .. code-block:: python + + mwdb.upload_file( + "malware.exe", + open("malware.exe", "rb").read(), + parent="3629344675705286607dd0f680c66c19f7e310a1", + public=True) """ result = self._upload("file", req_files={'file': (name, content)}, **kwargs) return MalwarecageFile(self.api, result) - def upload_config(self, family, cfg, **kwargs): + def upload_config(self, family, cfg, config_type="static", **kwargs): """ Upload configuration object - :param family: Malware family name - :param cfg: Dict object with configuration - :param parent: (optional) Parent object or parent identifier - :param metakeys: (optional) Dictionary with metakeys. - If you want to set many values with the same key: use list - :param share_with: (optional) Group name you want to share object with - :param private: (optional) True if sample should be uploaded as private - :param public: (optional) True if sample should be visible for everyone + :param family: Malware family name (see also :py:attr:`MalwarecageConfig.family`) + :type family: str + :param cfg: Dict object with configuration (see also :py:attr:`MalwarecageConfig.cfg`) + :type cfg: dict + :param config_type: Configuration type (default: static, see also :py:attr:`MalwarecageConfig.type`) + :type config_type: str, optional + :param parent: Parent object or parent identifier + :type parent: :class:`MalwarecageObject` or str, optional + :param metakeys: Dictionary with metakeys. + If you want to set many values with the same key: use list as value + :type metakeys: dict, optional + :param share_with: Group name you want to share object with + :type share_with: str, optional + :param private: True if sample should be uploaded as private + :type private: bool, optional + :param public: True if sample should be visible for everyone + :type public: bool, optional :rtype: :class:`MalwarecageConfig` - :raises: requests.exceptions.HTTPError + :raises: :class:`requests.exceptions.HTTPError`, :class:`ValueError` + + .. code-block:: python + + mwdb.upload_config( + "evil", + { + "botnet": "mal0123", + "version": 2019, + "urls": [ + "http://example.com", + "http://example.com/2" + ] + } + parent="3629344675705286607dd0f680c66c19f7e310a1", + public=True) """ result = self._upload("config", req_json={ "family": family, - "cfg": cfg + "cfg": cfg, + "config_type": config_type }, **kwargs) return MalwarecageConfig(self.api, result) @@ -250,18 +385,25 @@ def upload_blob(self, name, type, content, **kwargs): """ Upload blob object - :param name: Blob name - :param type: Blob type - :param content: Blob content - :param cfg: Dict object with configuration - :param parent: (optional) Parent object or parent identifier - :param metakeys: (optional) Dictionary with metakeys. - If you want to set many values with the same key: use list - :param share_with: (optional) Group name you want to share object with - :param private: (optional) True if sample should be uploaded as private - :param public: (optional) True if sample should be visible for everyone + :param name: Blob name (see also :py:attr:`MalwarecageBlob.blob_name`) + :type name: str + :param type: Blob type (see also :py:attr:`MalwarecageBlob.blob_type`) + :type type: str + :param content: Blob content (see also :py:attr:`MalwarecageBlob.content`) + :type content: str + :param parent: Parent object or parent identifier + :type parent: :class:`MalwarecageObject` or str, optional + :param metakeys: Dictionary with metakeys. + If you want to set many values with the same key: use list as value + :type metakeys: dict, optional + :param share_with: Group name you want to share object with + :type share_with: str, optional + :param private: True if sample should be uploaded as private + :type private: bool, optional + :param public: True if sample should be visible for everyone + :type public: bool, optional :rtype: :class:`MalwarecageBlob` - :raises: requests.exceptions.HTTPError + :raises: :class:`requests.exceptions.HTTPError`, :class:`ValueError` """ result = self._upload("blob", req_json={ "blob_name": name, diff --git a/src/file.py b/src/file.py index 8194b6a..c3b7d6b 100644 --- a/src/file.py +++ b/src/file.py @@ -6,6 +6,10 @@ class MalwarecageFile(MalwarecageObject): URL_PATTERN = "file/{id}" + @staticmethod + def create(api, data): + return MalwarecageFile(api, data) + @lazy_property() def md5(self): return self.data.get("md5") @@ -28,46 +32,71 @@ def ssdeep(self): @lazy_property() def file_name(self): + """ + Sample original name + """ return self.data.get("file_name") @lazy_property() def file_size(self): + """ + Sample size in bytes + """ return self.data.get("file_size") @lazy_property() def file_type(self): + """ + Sample type + """ return self.data.get("file_type") @property def name(self): """ - Alias for file_name property + Alias for :py:attr:`file_name` property """ return self.file_name @property def size(self): """ - Alias for file_size property + Alias for :py:attr:`file_size` property """ return self.file_size @property def type(self): """ - Alias for file_type property + Alias for :py:attr:`file_type` property """ return self.file_type def download(self): """ Downloads file contents + :return: File contents + :rtype: str + + Example - download first file with size less than 1000 bytes and VBS extension + + .. code-block:: python + + dropper = next(mwdb.search_file('file.size:[0 TO 1000] AND file.name:"*.vbs"')) + + with open(dropper.file_name, "wb") as f: + f.write(dropper.download()) + + print("Downloaded {}".format(dropper.file_name)) """ token = self.api.post("request/sample/{id}".format(**self.data))["url"].split("/")[-1] return self.api.get("download/{}".format(token), raw=True) def download_content(self): + """ + .. deprecated:: 2.3.0 + Use :py:meth:`download` instead. + """ warnings.warn("download_content() is deprecated. Use download() method.", DeprecationWarning) return self.download() - diff --git a/src/object.py b/src/object.py index bdfbb92..8909b55 100644 --- a/src/object.py +++ b/src/object.py @@ -2,7 +2,11 @@ from functools import wraps -def lazy_property(url_pattern=None): +class PropertyUnloaded(RuntimeError): + pass + + +def lazy_property(url_pattern=None, nullable=False): def wrapper(f): @property @wraps(f) @@ -10,10 +14,15 @@ def wrapped_property(self): url = (url_pattern or getattr(self, "URL_PATTERN", None)).format(**self.data) property = f.__name__ mapper = getattr(self, "mapper_{}".format(property), lambda d: d) - if f(self) is None: + try: + result = f(self) + if result is None and not nullable: + raise PropertyUnloaded() + return result + except PropertyUnloaded: data = self.api.get(url) self._update(mapper(data)) - return f(self) + return f(self) return wrapped_property return wrapper @@ -34,38 +43,32 @@ def id(self): """ return self.data["id"] - @property - def sha256(self): - return self.id - - -class MalwarecageComment(MalwarecageElement): - def __init__(self, api, data, parent): - super(MalwarecageComment, self).__init__(api, data) - self.parent = parent - - @property - def author(self): - return self.data["author"] - - @property - def timestamp(self): - return self.data["timestamp"] - - @property - def comment(self): - return self.data["comment"] - - def delete(self): - self.api.delete("object/{}/comment/{}".format(self.parent.id, self.id)) - class MalwarecageObject(MalwarecageElement): """ - Represents generic Malwarecage object + Represents abstract, generic Malwarecage object. + + Should never be instantiated directly. + + If you really need to get synthetic instance - use internal :py:meth:`create` static method. """ URL_PATTERN = "object/{id}" + def _update(self, data): + from .config import MalwarecageConfig + if "config" not in data: + data = dict(data) + if "latest_config" in data: + data["config"] = MalwarecageConfig(self.api, data["latest_config"]) + elif "children" in data: + """ + If there are children but no latest_config: probably API is in old version + Try to emulate + """ + config = next((child for child in data["children"] if child["type"] == "static_config"), None) + data["config"] = config and MalwarecageConfig(self.api, config) + super(MalwarecageObject, self)._update(data) + @staticmethod def create(api, data): from .file import MalwarecageFile @@ -82,6 +85,13 @@ def create(api, data): else: return None + @property + def sha256(self): + """ + Object identifier (sha256) + """ + return self.id + def mapper_tags(self, data): return {"tags": data} @@ -89,6 +99,8 @@ def mapper_tags(self, data): def tags(self): """ Returns list of tags + + :rtype: list[str] :return: List of tags """ return [t["tag"] for t in self.data["tags"]] if "tags" in self.data else None @@ -100,15 +112,43 @@ def mapper_comments(self, data): def comments(self): """ Returns list of comments + + :rtype: list[:class:`mwdblib.comment.MalwarecageComment`] :return: List of comment objects + + Example - print all comments of last object commented as "malware": + + .. code-block:: python + + comments = next(mwdb.search_files('comment:"*malware*"')).comments + for comment in comments: + print("{} {}".format(comment.author, comment.comment)) """ + from .comment import MalwarecageComment return list(map(lambda c: MalwarecageComment(self.api, c, self), self.data["comments"])) \ if "comments" in self.data else None + def mapper_shares(self, data): + return {"shares": data.get("shares", [])} + + @lazy_property("object/{id}/share") + def shares(self): + """ + Returns list of shares + + :rtype: list[:class:`mwdblib.share.MalwarecageShare`] + :return: List of share objects + """ + from .share import MalwarecageShare + return list(map(lambda s: MalwarecageShare(self.api, s, self), self.data["shares"])) \ + if "shares" in self.data else None + @lazy_property("object/{id}/meta") def metakeys(self): """ Returns dict object with metakeys. + + :rtype: dict :return: Dict object containing metakey attributes """ if "metakeys" not in self.data: @@ -122,6 +162,8 @@ def metakeys(self): def upload_time(self): """ Returns timestamp of first object upload + + :rtype: :class:`datetime.datetime` :return: datetime object with object upload timestamp """ import dateutil.parser @@ -131,6 +173,8 @@ def upload_time(self): def parents(self): """ Returns list of parent objects + + :rtype: List[:class:`MalwarecageObject`] :return: List of parent objects """ return list(map(lambda o: MalwarecageObject.create(self.api, o), self.data["parents"])) \ @@ -140,15 +184,31 @@ def parents(self): def children(self): """ Returns list of child objects + + :rtype: List[:class:`MalwarecageObject`] :return: List of child objects """ return list(map(lambda o: MalwarecageObject.create(self.api, o), self.data["children"])) \ if "children" in self.data else None + @lazy_property(nullable=True) + def config(self): + """ + Returns latest config related with this object + + :rtype: :class:`MalwarecageConfig` or None + :return: Latest configuration if found + """ + if "config" not in self.data: + raise PropertyUnloaded() + return self.data["config"] + def add_child(self, child): """ Adds reference to child with current object as parent - :param child: MalwarecageObject object + + :param child: Object + :type child: MalwarecageObject """ self.api.put("object/{parent}/child/{child}".format(parent=self.id, child=child.id)) if "children" in self.data: @@ -157,7 +217,9 @@ def add_child(self, child): def add_tag(self, tag): """ Tags object using specified tag + :param tag: Tag string + :type tag: str """ self.api.put("object/{id}/tag".format(**self.data), json={ "tag": tag @@ -168,7 +230,9 @@ def add_tag(self, tag): def remove_tag(self, tag): """ Untags object using specified tag + :param tag: Tag string + :type tag: str """ self.api.delete("object/{id}/tag".format(**self.data), params={ "tag": tag @@ -179,7 +243,9 @@ def remove_tag(self, tag): def add_comment(self, comment): """ Adds comment + :param comment: Comment string + :type comment: str """ self.api.post("object/{id}/comment".format(**self.data), json={ "comment": comment @@ -190,8 +256,11 @@ def add_comment(self, comment): def add_metakey(self, key, value): """ Adds metakey attribute + :param key: Attribute key + :type key: str :param value: Attribute value + :type value: str """ self.api.post("object/{id}/meta".format(**self.data), json={ "key": key, @@ -200,7 +269,7 @@ def add_metakey(self, key, value): def flush(self): """ - Flushes local object state. + Flushes local object state in case of pending updates. All object-specific properties will be lazy-loaded using API """ self.data = {"id": self.data["id"]} diff --git a/src/share.py b/src/share.py new file mode 100644 index 0000000..1b1f179 --- /dev/null +++ b/src/share.py @@ -0,0 +1,102 @@ +import re + +from .object import MalwarecageElement, MalwarecageObject + + +class MalwarecageShareReason(object): + """ + Represents the reason why object was shared with specified group + """ + def __init__(self, api, access_reason): + self.api = api + self._reason = access_reason + self._data = {} + reason_match = re.match(r"^([A-Za-z]+) [a-z_]+:([0-9A-Za-z]+) by user:([0-9A-Za-z_\-]+)$", access_reason) + if reason_match: + self._data = { + "why": reason_match.group(1), + "what": reason_match.group(2), + "who": reason_match.group(3) + } + + @property + def what(self): + """ + Returns what was shared + + :rtype: :class:`mwdblib.MalwarecageObject` or None + """ + _what = self._data.get("what") + if isinstance(_what, str): + result = self.api.get("object/{}".format(_what)) + self._data["what"] = MalwarecageObject.create(self.api, result) + return self._data["what"] + else: + return _what + + @property + def why(self): + """ + Returns why it was shared + + :return: One of actions: 'queried', 'shared', 'added' + """ + if "why" not in self._data: + return None + return self._data["why"].lower() + + @property + def who(self): + """ + Returns who caused action returned by :py:attr:`why` property. + + :return: User login + """ + if "who" not in self._data: + return None + return self._data["who"] + + def __str__(self): + """ + Returns str with unparsed reason string (useful for custom reason entries) + """ + return self._reason + + +class MalwarecageShare(MalwarecageElement): + """ + Represents share entry in Malwarecage object + """ + def __init__(self, api, data, parent): + super(MalwarecageShare, self).__init__(api, data) + self.parent = parent + + @property + def timestamp(self): + """ + Returns timestamp of share + + :return: datetime object with object share timestamp + :rtype: datetime.datetime + """ + import dateutil.parser + return dateutil.parser.parse(self.data["access_time"]) + + @property + def group(self): + """ + Returns a group name that object is shared with + + :return: group name + :rtype: str + """ + return self.data["group_name"] + + @property + def reason(self): + """ + Returns why object was shared + + :rtype: :class:`MalwarecageShareReason` + """ + return MalwarecageShareReason(self.api, self.data["access_reason"]) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/e2etest_mwdblib.py b/tests/e2etest_mwdblib.py new file mode 100644 index 0000000..22edd85 --- /dev/null +++ b/tests/e2etest_mwdblib.py @@ -0,0 +1,94 @@ +import os +from src import Malwarecage +import unittest +from itertools import islice +from tests.request_counter import RequestsCounter + + +class TestE2E(unittest.TestCase): + def get_mwdb(self): + username = os.environ["MWDB_USER"] + password = os.environ["MWDB_PASS"] + + mwdb = Malwarecage() + mwdb.login(username, password) + return mwdb + + def test_login(self): + with RequestsCounter(1): + self.get_mwdb() + + def read_common(self, obj): + obj.id + obj.sha256 + obj.tags + # obj.metakeys - not allowed + obj.comments + obj.upload_time + obj.parents + obj.children + + def test_lazy_load(self): + mwdb = self.get_mwdb() + for obj in islice(mwdb.recent_objects(), 10): + with RequestsCounter(0): + obj.id + obj.sha256 + obj.tags + obj.upload_time + with RequestsCounter(1): + obj.parents + obj.children + with RequestsCounter(1): + obj.comments + + def test_recent_objects(self): + mwdb = self.get_mwdb() + for obj in islice(mwdb.recent_objects(), 10): + with RequestsCounter(2): + self.read_common(obj) + + def test_recentfiles(self): + mwdb = self.get_mwdb() + for obj in islice(mwdb.recent_files(), 10): + with RequestsCounter(2): + self.read_common(obj) + with RequestsCounter(0): + obj.md5 + obj.sha1 + obj.sha256 + obj.sha512 + obj.crc32 + obj.ssdeep + obj.file_name + obj.file_size + obj.file_type + obj.name + obj.size + obj.type + with RequestsCounter(2): + obj.download() + + def test_recentconfigs(self): + mwdb = self.get_mwdb() + for obj in islice(mwdb.recent_configs(), 10): + with RequestsCounter(2): + self.read_common(obj) + with RequestsCounter(0): + obj.family + obj.cfg + + def test_recentblobs(self): + mwdb = self.get_mwdb() + for obj in islice(mwdb.recent_blobs(), 10): + with RequestsCounter(2): + self.read_common(obj) + with RequestsCounter(0): + obj.blob_name + obj.blob_size + obj.blob_type + obj.name + obj.size + obj.type + obj.content + obj.last_seen diff --git a/tests/request_counter.py b/tests/request_counter.py new file mode 100644 index 0000000..7b2fa0c --- /dev/null +++ b/tests/request_counter.py @@ -0,0 +1,28 @@ +from requests import Session + + +class RequestCountMismatchException(Exception): + pass + + +class RequestsCounter: + def __init__(self, expect_requests): + self.monkey_patch = None + self.expect_requests = expect_requests + self.counter = 0 + + def __enter__(self): + def wrap(*args, **kwargs): + self.counter += 1 + return self.monkey_patch(*args, **kwargs) + + self.monkey_patch = Session.request + Session.request = wrap + + def __exit__(self, type, value, traceback): + Session.request = self.monkey_patch + if self.counter != self.expect_requests: + raise RequestCountMismatchException( + self.counter, + self.expect_requests + ) diff --git a/tests/test_mwdblib.py b/tests/test_mwdblib.py new file mode 100644 index 0000000..05ddaa6 --- /dev/null +++ b/tests/test_mwdblib.py @@ -0,0 +1,14 @@ +import unittest + + +class TestPublicApi(unittest.TestCase): + def test_public_api(self): + """ + This imports are a part of the public API and should never be removed + """ + from src import Malwarecage # noqa + from src import MalwarecageAPI # noqa + from src import MalwarecageFile # noqa + from src import MalwarecageObject # noqa + from src import MalwarecageConfig # noqa + from src import MalwarecageBlob # noqa