Skip to content

Commit

Permalink
Make DB timestamps TZ-aware
Browse files Browse the repository at this point in the history
  • Loading branch information
nothingface0 committed Sep 6, 2023
1 parent dfb2f1f commit 7da5698
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 128 deletions.
46 changes: 23 additions & 23 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.11"]

# Service containers to run with `container-job`
services:
# Label used to access the service container
Expand All @@ -32,27 +32,27 @@ jobs:
--health-timeout 5s
--health-retries 5
steps:
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
- name: Test with pytest
run: |
pytest tests
env:
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
- name: Test with pytest
run: |
pytest tests -s
env:
# Env vars for pytest
POSTGRES_USERNAME: postgres
POSTGRES_HOST: localhost
Expand All @@ -61,4 +61,4 @@ jobs:
POSTGRES_PLAYBACK_DB_NAME: postgres_playback_test
POSTGRES_PORT: 5432
DQM_CR_USERNAMES: "user:password"
- run: echo "🍏 This job's status is ${{ job.status }}."
- run: echo "🍏 This job's status is ${{ job.status }}."
74 changes: 51 additions & 23 deletions db.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
### DQM^2 Mirror DB === >
import os
import sys
import psycopg2
import sqlalchemy
from sqlalchemy_utils import database_exists
Expand All @@ -7,6 +9,12 @@
from datetime import datetime
from exceptions import DatabaseNotFoundError

sys.path.append(os.path.join(os.path.dirname(__file__), "."))

from dqmsquare_cfg import TZ

DEFAULT_DATETIME = TZ.localize(datetime(2012, 3, 3, 10, 10, 10, 0))


class DQM2MirrorDB:
"""
Expand All @@ -15,7 +23,7 @@ class DQM2MirrorDB:
"""

TB_NAME_RUNS = "runs"
TB_DESCRIPTION_RUNS = "( id TEXT PRIMARY KEY NOT NULL, client TEXT, run INT, rev INT, hostname TEXT, exit_code INT, events_total INT, events_rate REAL, cmssw_run INT, cmssw_lumi INT, client_path TEXT, runkey TEXT, fi_state TEXT, timestamp TIMESTAMP, vmrss TEXT, stdlog_start TEXT, stdlog_end TEXT )"
TB_DESCRIPTION_RUNS = "( id TEXT PRIMARY KEY NOT NULL, client TEXT, run INT, rev INT, hostname TEXT, exit_code INT, events_total INT, events_rate REAL, cmssw_run INT, cmssw_lumi INT, client_path TEXT, runkey TEXT, fi_state TEXT, timestamp TIMESTAMP WITH TIME ZONE, vmrss TEXT, stdlog_start TEXT, stdlog_end TEXT )"
TB_DESCRIPTION_RUNS_SHORT = [
"id",
"client",
Expand All @@ -38,7 +46,7 @@ class DQM2MirrorDB:
TB_DESCRIPTION_RUNS_SHORT_NOLOGS = "id , client , run , rev , hostname , exit_code , events_total , events_rate , cmssw_run , cmssw_lumi , client_path , runkey , fi_state, timestamp, vmrss"

TB_NAME_GRAPHS = "graphs"
TB_DESCRIPTION_GRAPHS = "( run INT PRIMARY KEY NOT NULL, rev INT, id TEXT, timestamp TIMESTAMP, global_start TIMESTAMP, stream_data TEXT, hostname TEXT )"
TB_DESCRIPTION_GRAPHS = "( run INT PRIMARY KEY NOT NULL, rev INT, id TEXT, timestamp TIMESTAMP WITH TIME ZONE, global_start TIMESTAMP WITH TIME ZONE, stream_data TEXT, hostname TEXT )"
TB_DESCRIPTION_GRAPHS_SHORT = [
"run",
"rev",
Expand Down Expand Up @@ -165,26 +173,29 @@ def fill_graph(self, header: dict, document: dict) -> int:

rev = header.get("_rev", -1)
timestamp = extra.get(
"timestamp", datetime(2012, 3, 3, 10, 10, 10, 0).timestamp()
"timestamp",
DEFAULT_DATETIME,
)
global_start = extra.get(
"global_start", datetime(2012, 3, 3, 10, 10, 10, 0).timestamp()
"global_start",
DEFAULT_DATETIME,
)

stream_data = str(extra.get("streams", ""))
hostname = header.get("hostname", "")

# Make timestamps TZ aware
if not isinstance(global_start, datetime):
try:
global_start = datetime.fromtimestamp(global_start)
global_start = TZ.localize(datetime.fromtimestamp(global_start))
except Exception as e:
self.log.warning(
f"Could not parse {global_start} as a timestamp. Error: '{repr(e)}'"
)

if not isinstance(timestamp, datetime):
try:
timestamp = datetime.fromtimestamp(timestamp)
timestamp = TZ.localize(datetime.fromtimestamp(timestamp))
except Exception as e:
self.log.warning(
f"Could not parse {timestamp} as a timestamp. Error: '{repr(e)}'"
Expand Down Expand Up @@ -213,7 +224,10 @@ def fill_graph(self, header: dict, document: dict) -> int:

return 0

def get_graphs_data(self, run) -> list:
def get_graphs_data(self, run: int) -> list:
"""
Load graph data for a specific run
"""
self.log.debug("DQM2MirrorDB.get_graphs_data() - " + str(run))
with self.engine.connect() as cur:
answer = cur.execute(
Expand All @@ -229,9 +243,9 @@ def get_graphs_data(self, run) -> list:

return answer

def fill(self, header: dict, document: dict) -> int:
def fill_run(self, header: dict, document: dict) -> int:
"""
fill 'runs' table with clients data
Fill 'runs' table with clients' data
"""
id = header.get("_id")
client = header.get("tag", "")
Expand All @@ -253,14 +267,17 @@ def fill(self, header: dict, document: dict) -> int:
pass
fi_state = document.get("fi_state", "")
timestamp = header.get(
"timestamp", datetime(2012, 3, 3, 10, 10, 10, 0).timestamp()
"timestamp",
DEFAULT_DATETIME,
)
try:
timestamp = datetime.fromtimestamp(timestamp)
except Exception as e:
self.log.warn(
f"Timestamp {timestamp} could not be cast to datetime: {repr(e)}"
)
if not isinstance(timestamp, datetime):
try:
# Make timestamp TZ aware
timestamp = TZ.localize(datetime.fromtimestamp(timestamp))
except Exception as e:
self.log.warning(
f"Timestamp {timestamp} could not be cast to datetime: {repr(e)}"
)
extra = document.get("extra", {})
ps_info = extra.get("ps_info", {})
VmRSS = ps_info.get("VmRSS", "")
Expand Down Expand Up @@ -288,7 +305,7 @@ def fill(self, header: dict, document: dict) -> int:
stdlog_end,
)
self.log.debug(
f"DQM2MirrorDB.fill() - {str(values[:-2])}, {str(values[-2][:10])}..{str(values[-2][-10:])}, {str(values[-1][:10])}..{str(values[-1][-10:])}"
f"DQM2MirrorDB.fill_run() - {str(values[:-2])}, {str(values[-2][:10])}..{str(values[-2][-10:])}, {str(values[-1][:10])}..{str(values[-1][-10:])}"
)
values_dic = {}
for val, name in zip(values, self.TB_DESCRIPTION_RUNS_SHORT):
Expand Down Expand Up @@ -338,9 +355,15 @@ def fill(self, header: dict, document: dict) -> int:

return 0

def get(self, run_start, run_end, bad_only=False, with_ls_only=False):
def get(
self,
run_start: int,
run_end: int,
bad_only: bool = False,
with_ls_only: bool = False,
) -> list[tuple]:
"""
get data from 'runs' table with client's data
Get data from 'runs' table.
"""
self.log.debug("DQM2MirrorDB.get() - " + str(run_start) + " " + str(run_end))
with self.engine.connect() as cur:
Expand All @@ -362,7 +385,10 @@ def get(self, run_start, run_end, bad_only=False, with_ls_only=False):
self.log.debug(f"Read DB for runs {run_start}-{run_end}: {answer}")
return answer

def make_mirror_entry(self, data):
def format_run_data(self, data):
"""
Given run data from the DB, format them for the front-end.
"""
answer = []
(
id,
Expand All @@ -381,12 +407,14 @@ def make_mirror_entry(self, data):
timestamp,
VmRSS,
) = data

client = self.get_short_client_name(client)
# Hide part of the hostname for safety reasons
var = hostname.split("-")
hostname = "..".join([var[0], var[-1]])
td = datetime.now() - timestamp

# Timestamp is of type datetime, and is tz-aware,
# as it's coming straight from the DB.
td = TZ.localize(datetime.now()) - timestamp
days = int(td.days)
hours = int((td.seconds / (60 * 60)) % 24)
minutes = int((td.seconds / 60) % 60)
Expand Down Expand Up @@ -485,7 +513,7 @@ def get_mirror_data(self, run_number: int) -> tuple:
Returns a tuple: global_data and clients_data (???)
"""
runs = self.get(run_number, run_number)
runs_out = [self.make_mirror_entry(run) for run in runs]
runs_out = [self.format_run_data(run) for run in runs]
clients_data = [run[0] for run in runs_out]
global_data = runs_out[0][1] if runs_out else []
return global_data, clients_data
Expand Down
6 changes: 6 additions & 0 deletions dqmsquare_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
"""

import os
import pytz
import tempfile
from dotenv import load_dotenv

# Important for converting datetime objects (from the database)
# to timestamps. Github actions, for example, run in different timezones,
# leading to different timestamps and failing tests.
TZ = pytz.timezone("Europe/Zurich")


def format_db_uri(
username: str = "postgres",
Expand Down
Loading

0 comments on commit 7da5698

Please sign in to comment.