diff --git a/pyaerocom/data/variables.ini b/pyaerocom/data/variables.ini index 86300f132..18455b9d2 100644 --- a/pyaerocom/data/variables.ini +++ b/pyaerocom/data/variables.ini @@ -2649,6 +2649,11 @@ unit = ug m-3 description=Mass concentration of organic carbon unit = ug m-3 +# ETEX1 + +[concch] +description=Mass concentration of CH above ambient level +unit = ng m-3 # EMEP vars added for testing diff --git a/pyaerocom/io/read_etex1.py b/pyaerocom/io/read_etex1.py new file mode 100644 index 000000000..c3c26871a --- /dev/null +++ b/pyaerocom/io/read_etex1.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +from datetime import datetime, timedelta, timezone +from pathlib import Path +from types import SimpleNamespace +from typing import Generator + +import pandas as pd +from numpy import sign + +from pyaerocom.ungriddeddata import UngriddedData +from pyaerocom.io.readungriddedbase import ReadUngriddedBase + + +ETEX1 = SimpleNamespace( + variable_name="concch", + long_name="PMCH concentration above ambient level", + units="ng/m3", + version="etex1_v1.1.960505", + stations=Path("/lustre/storeB/users/magnusu/ETEX-1/stationlist.950130"), + concentrations=Path("/lustre/storeB/users/magnusu/ETEX-1/pmch.dat"), + quality=Path("/lustre/storeB/users/magnusu/ETEX-1/pmch.cod"), + samples=[f"{n:02}" for n in range(30)], + sample_start=[ + datetime(1994, 10, 23, 15, tzinfo=timezone.utc) + timedelta(h) for h in range(30) + ], + sample_end=[datetime(1994, 10, 23, 18, tzinfo=timezone.utc) + timedelta(h) for h in range(30)], + flags={ + 0: "not sampled", + 1: "valid sample, no tracer found", + 11: "valid sample, tracer found", + 21: "concentration within 2std of background variation", + 31: "concentration given or higher", + 41: "tracer detected, but can't be quantified", + 10: "lost in sampling", + 20: "lost in analysis", + 30: "lost in shipment", + }, +) + + +class ReadETEX1(ReadUngriddedBase): + + __version__ = "0.0.0" + _FILEMASK = None + DATA_ID = "etex1" + DEFAULT_VARS = [ETEX1.variable_name] + PROVIDES_VARIABLES = DEFAULT_VARS + SUPPORTED_DATASETS = [DATA_ID] + TS_TYPE = "3hourly" + + read_file = None + + def read( + self, vars_to_retrieve=None, files=..., first_file=None, last_file=None + ) -> UngriddedData: + if isinstance(vars_to_retrieve, str): + assert ( + vars_to_retrieve in self.PROVIDES_VARIABLES + ), f"this dataset only has {self.PROVIDES_VARIABLES}" + elif isinstance(vars_to_retrieve, list): + assert all( + var in self.PROVIDES_VARIABLES for var in vars_to_retrieve + ), f"this dataset only has {self.PROVIDES_VARIABLES}" + + return UngriddedData.from_station_data(list(self.__reader())) + + @classmethod + def __reader(cls) -> Generator[dict, None, None]: + station = read_stations().set_index("cc") + for cc, df in read_data().groupby("cc"): + site = station.loc[cc] + conc = df.rename(columns={"end": "time"}).set_index("time").concentration + conc.name = cls.DEFAULT_VARS[0] + yield dict( + station_id=site.name, + station_name=site["Station name"], + latitude=site.Long, + longitude=site.Lat, + altitude=site.Alt, + concch=conc, + var_info=dict(concch=dict(units=ETEX1.units, ts_type=cls.TS_TYPE)), + ) + + +def degrees_with_minutes(x: str) -> float: + """ + trasnform station coordiantes "degree.minutes" to float + """ + degrees, minutes = x.split(".") + return int(degrees) + int(minutes) / 60 * sign(float(x)) + + +def read_stations() -> pd.DataFrame: + return pd.read_csv( + ETEX1.stations, + delimiter=", ", + header=3, + skipfooter=26, + engine="python", + converters=dict(Lat=degrees_with_minutes, Long=degrees_with_minutes), + ).astype( + { + "cc": "string", + "Station name": "string", + "Lat": "float16", + "Long": "float16", + "Alt": "int16", + "WMOCode": "string", + "Remarks": "string", + } + ) + + +def read_pmch(path: Path, name: str, **kwargs) -> pd.DataFrame: + columns = ["index", "cc"] + ETEX1.samples + series = pd.read_csv( + path, + delim_whitespace=True, + header=1, + names=columns, + usecols=columns[1:], + index_col="cc", + **kwargs, + ).unstack() + series.index.names = ["sample", "cc"] + series.name = name + return series.reset_index() + + +def read_data() -> pd.DataFrame: + dat = read_pmch(ETEX1.concentrations, "concentration", na_values=[-0.99, -0.88]) + cod = read_pmch(ETEX1.quality, "quality") + cod["quality"] = pd.Categorical(cod.quality).rename_categories(ETEX1.flags) + + time = pd.DataFrame( + {"sample": ETEX1.samples, "start": ETEX1.sample_start, "end": ETEX1.sample_end} + ) + return pd.merge(dat, cod).merge(time, on="sample").drop("sample", axis="columns") diff --git a/pyaerocom/io/readungridded.py b/pyaerocom/io/readungridded.py index 8004c05c1..7ebb05096 100755 --- a/pyaerocom/io/readungridded.py +++ b/pyaerocom/io/readungridded.py @@ -12,34 +12,33 @@ # ######################################################################## -import os import logging +import os from pathlib import Path -from pyaerocom.combine_vardata_ungridded import combine_vardata_ungridded -from pyaerocom.exceptions import (DataRetrievalError, - NetworkNotImplemented, NetworkNotSupported) -from pyaerocom.io.read_aeronet_sdav2 import ReadAeronetSdaV2 -from pyaerocom.io.read_aeronet_sdav3 import ReadAeronetSdaV3 +from pyaerocom import const, logger, print_log +from pyaerocom.combine_vardata_ungridded import combine_vardata_ungridded +from pyaerocom.exceptions import DataRetrievalError, NetworkNotImplemented, NetworkNotSupported +from pyaerocom.helpers import varlist_aerocom +from pyaerocom.io.cachehandler_ungridded import CacheHandlerUngridded +from pyaerocom.io.read_aasetal import ReadAasEtal from pyaerocom.io.read_aeronet_invv2 import ReadAeronetInvV2 from pyaerocom.io.read_aeronet_invv3 import ReadAeronetInvV3 +from pyaerocom.io.read_aeronet_sdav2 import ReadAeronetSdaV2 +from pyaerocom.io.read_aeronet_sdav3 import ReadAeronetSdaV3 from pyaerocom.io.read_aeronet_sunv2 import ReadAeronetSunV2 from pyaerocom.io.read_aeronet_sunv3 import ReadAeronetSunV3 +from pyaerocom.io.read_airnow import ReadAirNow from pyaerocom.io.read_earlinet import ReadEarlinet from pyaerocom.io.read_ebas import ReadEbas -from pyaerocom.io.read_aasetal import ReadAasEtal -from pyaerocom.io.read_gaw import ReadGAW -from pyaerocom.io.read_ghost import ReadGhost from pyaerocom.io.read_eea_aqerep import ReadEEAAQEREP from pyaerocom.io.read_eea_aqerep_v2 import ReadEEAAQEREP_V2 -from pyaerocom.io.read_airnow import ReadAirNow +from pyaerocom.io.read_etex1 import ReadETEX1 +from pyaerocom.io.read_gaw import ReadGAW +from pyaerocom.io.read_ghost import ReadGhost from pyaerocom.io.read_marcopolo import ReadMarcoPolo - -from pyaerocom.io.cachehandler_ungridded import CacheHandlerUngridded from pyaerocom.ungriddeddata import UngriddedData -from pyaerocom.helpers import varlist_aerocom -from pyaerocom import const, print_log, logger class ReadUngridded(object): """Factory class for reading of ungridded data based on obsnetwork ID @@ -62,6 +61,7 @@ class ReadUngridded(object): ReadAeronetSunV3, ReadEarlinet, ReadEbas, + ReadETEX1, ReadGAW, ReadAasEtal, ReadGhost, @@ -794,4 +794,4 @@ def __str__(self): data_dirs={'EBASMC' : ebas_local} ) - data = reader.read('EBASMC', 'ac550aer') \ No newline at end of file + data = reader.read('EBASMC', 'ac550aer') diff --git a/tests/io/test_read_etex1.py b/tests/io/test_read_etex1.py new file mode 100644 index 000000000..e2180c9f6 --- /dev/null +++ b/tests/io/test_read_etex1.py @@ -0,0 +1,18 @@ +import pytest +from pyaerocom.io.read_etex1 import ReadETEX1 +from pyaerocom.io.readungridded import ReadUngridded +from pyaerocom.ungriddeddata import UngriddedData + +from ..conftest import lustre_unavail + + +@lustre_unavail +def test_read_etex1(): + data = ReadETEX1() + assert isinstance(data, UngriddedData) + + +@lustre_unavail +def test_read_ungridded(): + data = ReadUngridded().read("etex1", "concch") +