From 8a71ce50859f0797533e77e75e7e0816d1c5632a Mon Sep 17 00:00:00 2001 From: paultranvan Date: Tue, 31 Jan 2023 16:52:54 +0100 Subject: [PATCH] feat: Add timeseries purge script When operating a server, the `Stage_timeseries` database can become quite big. In the case where only the `Stage_analysis_timeseries` is actually useful after the pipeline execution, the user's timeseries can be deleted to speed up the pipeline and gain some disk space. --- bin/purge_user_timeseries.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 bin/purge_user_timeseries.py diff --git a/bin/purge_user_timeseries.py b/bin/purge_user_timeseries.py new file mode 100644 index 000000000..2c1fbf767 --- /dev/null +++ b/bin/purge_user_timeseries.py @@ -0,0 +1,35 @@ +import logging +import argparse +import uuid +from datetime import datetime +import emission.core.wrapper.user as ecwu +import emission.core.get_database as edb +import emission.core.wrapper.pipelinestate as ecwp +import emission.core.wrapper.pipelinestate as ecwp +import emission.storage.pipeline_queries as esp + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + + parser = argparse.ArgumentParser(prog="purge_user_timeseries") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("-e", "--user_email") + group.add_argument("-u", "--user_uuid") + + args = parser.parse_args() + + if args.user_uuid: + user_id = uuid.UUID(args.user_uuid) + else: + user_id = ecwu.User.fromEmail(args.user_email).uuid + + cstate = esp.get_current_state(user_id, ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS) + last_ts_run = cstate['last_ts_run'] + + if not last_ts_run: + logging.warning("No processed timeserie for user {}".format(user_id)) + exit(1) + + res = edb.get_timeseries_db().delete_many({"user_id": user_id, "metadata.write_ts": { "$lt": last_ts_run}}) + logging.info("{} deleted entries since {}".format(res.deleted_count, datetime.fromtimestamp(last_ts_run))) + \ No newline at end of file