Skip to content

Commit 7fc0550

Browse files
committed
fix: RawDataReader and the ReaderLogger class to better handle different environments and edge cases
1 parent 8f4bab9 commit 7fc0550

15 files changed

+168
-99
lines changed

AeroViz/plot/meteorology/hysplit.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,17 @@ def hysplit(file: Path = DEFAULT_FILE):
4040
# 創建地圖
4141
fig, ax = plt.subplots(figsize=(4, 5), subplot_kw={'projection': ccrs.PlateCarree()})
4242

43+
ax.set_global()
44+
# ax.stock_img()
45+
4346
# 設置地圖範圍
4447
ax.set_extent([116, 126, 17, 30], crs=ccrs.PlateCarree())
4548

4649
# 添加自然地理特徵
47-
ax.add_feature(cfeature.LAND)
48-
ax.add_feature(cfeature.OCEAN)
49-
ax.add_feature(cfeature.COASTLINE)
50-
ax.add_feature(cfeature.BORDERS, linestyle=':')
50+
ax.add_feature(cfeature.LAND.with_scale('10m'))
51+
ax.add_feature(cfeature.OCEAN.with_scale('10m'))
52+
ax.add_feature(cfeature.COASTLINE.with_scale('10m'))
53+
ax.add_feature(cfeature.BORDERS.with_scale('10m'), linestyle=':')
5154

5255
# 添加經緯度網格
5356
ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False)

AeroViz/rawDataReader/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
__all__ = ['RawDataReader']
1111

1212
SUPPORTED_INSTRUMENTS = [
13-
NEPH, Aurora, SMPS, GRIMM, APS_3321, AE33, AE43, BC1054,
13+
NEPH, Aurora, SMPS, GRIMM, APS, AE33, AE43, BC1054,
1414
MA350, TEOM, OCEC, IGAC, VOC, EPA, Minion
1515
]
1616

@@ -62,7 +62,7 @@ def RawDataReader(instrument_name: str,
6262
... instrument_name='BC1054',
6363
... path=Path('/path/to/data'),
6464
... start=datetime(2024, 2, 1),
65-
... end=datetime(2024, 7, 31, 23))
65+
... end=datetime(2024, 7, 31))
6666
"""
6767
# Mapping of instrument names to their respective classes
6868
instrument_class_map = {cls.__name__.split('.')[-1]: cls for cls in SUPPORTED_INSTRUMENTS}
@@ -71,7 +71,7 @@ def RawDataReader(instrument_name: str,
7171
if instrument_name not in meta.keys():
7272
raise ValueError(f"Instrument name '{instrument_name}' is not valid. \nMust be one of: {list(meta.keys())}")
7373

74-
# 檢查 path 是否存在且是一個目錄
74+
# Check if path exists and is a directory
7575
if not isinstance(path, Path):
7676
path = Path(path)
7777
if not path.exists() or not path.is_dir():
@@ -94,7 +94,7 @@ def RawDataReader(instrument_name: str,
9494
if end <= start:
9595
raise ValueError(f"Invalid time range: start {start} is after end {end}")
9696

97-
# 驗證 mean_freq 的格式是否正確
97+
# Verify that mean_freq format
9898
try:
9999
Timedelta(mean_freq)
100100
except ValueError:

AeroViz/rawDataReader/config/supported_instruments.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
"deter_key": {"Bins": ["all"]},
2626
},
2727

28-
"APS_3321": {
28+
"APS": {
2929
"pattern": ["*.txt"],
3030
"freq": "6min",
3131
"deter_key": {"Bins": ["all"]},

AeroViz/rawDataReader/core/__init__.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None
143143
:return: Processed DataFrame
144144
"""
145145
# Round timestamps and remove duplicates
146-
_df = _df.groupby(_df.index.round('1min')).first()
146+
_df = _df.groupby(_df.index.floor('1min')).first()
147147

148148
# Determine frequency
149149
freq = _df.index.inferred_freq or self.meta['freq']
@@ -161,7 +161,12 @@ def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None
161161
new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
162162

163163
# Process data: convert to numeric, resample, and reindex
164-
return _df.reindex(new_index)
164+
if freq in ['1min', 'min', 'T']:
165+
return _df.reindex(new_index, method='nearest', tolerance='1min')
166+
elif freq in ['1h', 'h', 'H']:
167+
return _df.reindex(new_index, method='nearest', tolerance='1h')
168+
else:
169+
return _df.reindex(new_index, method='nearest', tolerance=freq)
165170

166171
def _outlier_process(self, _df):
167172
outlier_file = self.path / 'outlier.json'

AeroViz/rawDataReader/core/logger.py

+116-49
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import codecs
12
import logging
3+
import os
24
import platform
35
import re
46
import sys
@@ -10,91 +12,156 @@ def __init__(self, name: str, log_path: Path):
1012
self.name = name
1113
self.log_path = log_path
1214

13-
# ANSI color codes
14-
self.CYAN = '\033[96m'
15-
self.BLUE = '\033[94m'
16-
self.GREEN = '\033[92m'
17-
self.YELLOW = '\033[93m'
18-
self.RED = '\033[91m'
19-
self.RESET = '\033[0m'
15+
# 檢查是否支持顏色輸出
16+
self.color_support = self._check_color_support()
17+
18+
# 設置顏色代碼
19+
if self.color_support:
20+
self.CYAN = '\033[96m'
21+
self.BLUE = '\033[94m'
22+
self.GREEN = '\033[92m'
23+
self.YELLOW = '\033[93m'
24+
self.RED = '\033[91m'
25+
self.RESET = '\033[0m'
26+
else:
27+
self.CYAN = ''
28+
self.BLUE = ''
29+
self.GREEN = ''
30+
self.YELLOW = ''
31+
self.RED = ''
32+
self.RESET = ''
33+
34+
# 檢查 Unicode 支持
35+
self.unicode_support = self._setup_unicode()
36+
37+
# 設置框架字符
38+
if self.unicode_support:
39+
self.BOX_TOP_LEFT = "╔"
40+
self.BOX_TOP_RIGHT = "╗"
41+
self.BOX_BOTTOM_LEFT = "╚"
42+
self.BOX_BOTTOM_RIGHT = "╝"
43+
self.BOX_HORIZONTAL = "═"
44+
self.BOX_VERTICAL = "║"
45+
self.ARROW = "▶"
46+
else:
47+
self.BOX_TOP_LEFT = "+"
48+
self.BOX_TOP_RIGHT = "+"
49+
self.BOX_BOTTOM_LEFT = "+"
50+
self.BOX_BOTTOM_RIGHT = "+"
51+
self.BOX_HORIZONTAL = "-"
52+
self.BOX_VERTICAL = "|"
53+
self.ARROW = ">"
54+
55+
self.logger = self._setup_logger()
56+
57+
def _check_color_support(self) -> bool:
58+
"""檢查環境是否支持顏色輸出"""
59+
# 檢查是否在 Spyder 或其他 IDE 中運行
60+
if any(IDE in os.environ.get('PYTHONPATH', '') for IDE in ['spyder', 'jupyter']):
61+
return False
2062

21-
# 強制 Windows 使用 UTF-8
63+
# 檢查是否強制啟用或禁用顏色
64+
if 'FORCE_COLOR' in os.environ:
65+
return os.environ['FORCE_COLOR'].lower() in ('1', 'true', 'yes')
66+
67+
# Windows 檢查
2268
if platform.system().lower() == 'windows':
23-
try:
24-
sys.stdout.reconfigure(encoding='utf-8')
25-
self.unicode_support = True
26-
except Exception:
27-
import codecs
28-
sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer)
29-
self.unicode_support = True
30-
else:
31-
self.unicode_support = True
69+
return ('ANSICON' in os.environ or
70+
'WT_SESSION' in os.environ or # Windows Terminal
71+
'ConEmuANSI' in os.environ or
72+
os.environ.get('TERM_PROGRAM', '').lower() == 'vscode')
3273

33-
# 使用 Unicode 字符
34-
self.BOX_TOP_LEFT = "╔"
35-
self.BOX_TOP_RIGHT = "╗"
36-
self.BOX_BOTTOM_LEFT = "╚"
37-
self.BOX_BOTTOM_RIGHT = "╝"
38-
self.BOX_HORIZONTAL = "═"
39-
self.BOX_VERTICAL = "║"
40-
self.ARROW = "▶"
74+
# 其他系統檢查
75+
return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
4176

42-
self.logger = self._setup_logger()
77+
def _setup_unicode(self) -> bool:
78+
"""設置 Unicode 支持"""
79+
if platform.system().lower() == 'windows':
80+
try:
81+
if hasattr(sys.stdout, 'reconfigure'):
82+
sys.stdout.reconfigure(encoding='utf-8')
83+
elif hasattr(sys.stdout, 'buffer'):
84+
sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer)
85+
else:
86+
return False
87+
return True
88+
except Exception:
89+
return False
90+
return True
4391

4492
def _setup_logger(self) -> logging.Logger:
93+
"""設置logger"""
4594
logger = logging.getLogger(self.name)
4695
logger.setLevel(logging.INFO)
4796

48-
# Remove existing handlers
97+
# 移除現有的 handlers
4998
for handler in logger.handlers[:]:
5099
handler.close()
51100
logger.removeHandler(handler)
52101

53-
# clean ANSI formatter (for log file)
102+
# 清理 ANSI 格式化器
54103
class CleanFormatter(logging.Formatter):
55104
def format(self, record):
56105
formatted_msg = super().format(record)
57106
return re.sub(r'\033\[[0-9;]*m', '', formatted_msg)
58107

59-
# Set up handlers with UTF-8 encoding
60-
file_handler = logging.FileHandler(self.log_path / f'{self.name}.log', encoding='utf-8')
61-
file_handler.setFormatter(CleanFormatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
62-
108+
# 設置檔案處理器
109+
try:
110+
log_dir = Path(self.log_path)
111+
log_dir.mkdir(parents=True, exist_ok=True)
112+
file_handler = logging.FileHandler(
113+
log_dir / f'{self.name}.log',
114+
encoding='utf-8',
115+
errors='replace'
116+
)
117+
file_handler.setFormatter(
118+
CleanFormatter('%(asctime)s - %(message)s',
119+
datefmt='%Y-%m-%d %H:%M:%S')
120+
)
121+
logger.addHandler(file_handler)
122+
except Exception as e:
123+
print(f"Warning: Could not set up file logging: {e}")
124+
125+
# 設置控制台處理器
63126
console_handler = logging.StreamHandler(sys.stdout)
64127
console_handler.setFormatter(logging.Formatter('%(message)s'))
65-
66-
logger.addHandler(file_handler)
67128
logger.addHandler(console_handler)
68129

69130
return logger
70131

132+
def _safe_print(self, text: str) -> str:
133+
"""安全打印,處理編碼問題"""
134+
if not self.unicode_support:
135+
text = text.encode('ascii', 'replace').decode('ascii')
136+
return text
137+
71138
def info(self, msg: str):
72-
self.logger.info(msg)
139+
self.logger.info(self._safe_print(msg))
73140

74141
def warning(self, msg: str):
75-
self.logger.warning(msg)
142+
self.logger.warning(self._safe_print(msg))
76143

77144
def error(self, msg: str):
78-
self.logger.error(msg)
145+
self.logger.error(self._safe_print(msg))
79146

80147
def info_box(self, text: str, color_part: str = None, width: int = 80):
81-
"""
82-
Create a boxed message with optional colored text
83-
84-
Args:
85-
text: Base text format (e.g., "Reading {} RAW DATA from {} to {}")
86-
color_part: Part of text to be colored (e.g., "RAW DATA")
87-
width: Box width
88-
"""
148+
"""創建帶框的消息,可選擇性地為部分文本著色"""
149+
# 處理文本
89150
display_text = text.replace(color_part, " " * len(color_part)) if color_part else text
90151

152+
# 計算padding
91153
left_padding = " " * ((width - len(display_text)) // 2)
92154
right_padding = " " * (width - len(display_text) - len(left_padding))
93155

94-
content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}") if color_part else text
156+
# 處理顏色
157+
if color_part and self.color_support:
158+
content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}")
159+
else:
160+
content = text
95161

96162
__content__ = f"{left_padding}{content}{right_padding}"
97163

98-
self.info(f"╔{'═' * width}╗")
99-
self.info(f"║{__content__}║")
100-
self.info(f"╚{'═' * width}╝")
164+
# 使用當前設置的框架字符
165+
self.info(f"{self.BOX_TOP_LEFT}{self.BOX_HORIZONTAL * width}{self.BOX_TOP_RIGHT}")
166+
self.info(f"{self.BOX_VERTICAL}{__content__}{self.BOX_VERTICAL}")
167+
self.info(f"{self.BOX_BOTTOM_LEFT}{self.BOX_HORIZONTAL * width}{self.BOX_BOTTOM_RIGHT}")

AeroViz/rawDataReader/script/AE33.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
88

99
def _raw_reader(self, file):
1010
if file.stat().st_size / 1024 < 550:
11-
self.logger.info(f'\t {file} may not be a whole daily data. Make sure the file is correct.')
11+
self.logger.info(f'\t {file.name} may not be a whole daily data. Make sure the file is correct.')
1212

1313
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
1414
delimiter=r'\s+', skiprows=5, usecols=range(67))

AeroViz/rawDataReader/script/APS_3321.py AeroViz/rawDataReader/script/APS.py

+9-16
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,24 @@
55

66

77
class Reader(AbstractReader):
8-
nam = 'APS_3321'
8+
nam = 'APS'
99

1010
def _raw_reader(self, file):
1111
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
12-
_df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
13-
_key = list(_df.keys()[3:54]) ## 542 ~ 1981
12+
_df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']},
13+
date_format='%m/%d/%y %H:%M:%S').set_index('Time')
1414

15-
# create new keys
16-
_newkey = {}
17-
for _k in _key:
18-
_newkey[_k] = float(_k).__round__(4)
19-
# _newkey['Mode(m)'] = 'mode'
15+
# 542 nm ~ 1981 nm
16+
_df = _df.iloc[:, 3:54].rename(columns=lambda x: round(float(x), 4))
2017

21-
# get new dataframe
22-
_df = _df[_newkey.keys()].rename(_newkey, axis=1)
23-
# df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy()
24-
25-
_df_idx = to_datetime(_df.index, errors='coerce')
18+
_df_idx = to_datetime(_df.index, format='%m/%d/%y %H:%M:%S', errors='coerce')
2619

2720
return _df.set_index(_df_idx).loc[_df_idx.dropna()]
2821

2922
# QC data
3023
def _QC(self, _df):
24+
_df = _df.copy()
25+
3126
# mask out the data size lower than 7
3227
_df['total'] = _df.sum(axis=1, min_count=1) * (np.diff(np.log(_df.keys().to_numpy(float)))).mean()
3328
_df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill()
@@ -37,11 +32,9 @@ def _QC(self, _df):
3732
_df = _df.mask(_df['total'] > 700)
3833

3934
# not confirmed
40-
"""
41-
## remove the bin over 4000 nm which num. conc. larger than 1
35+
# remove the bin over 4000 nm which num. conc. larger than 1
4236
# _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2]>=4.]
4337

4438
# _df_1hr[_df_remv_ky] = _df_1hr[_df_remv_ky].copy().mask(_df_1hr[_df_remv_ky]>1.)
45-
# """
4639

4740
return _df[_df.keys()[:-1]]

AeroViz/rawDataReader/script/Minion.py

+2
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ def IGAC_QAQC(self,
187187
['Cl-', 'NO2-', 'NO3-', 'SO42-'],
188188
['SO42-', 'NO3-', 'NH4+'])
189189

190+
CA_range = () # CA, AC Q3=1.5 * IQR
191+
190192
_df['+_mole'] = _df[_cation].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1, skipna=True)
191193
_df['-_mole'] = _df[_anion].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
192194

AeroViz/rawDataReader/script/OCEC.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ def _raw_reader(self, file):
7373

7474
# QC data
7575
def _QC(self, _df):
76-
MDL = {'Thermal_OC': 0.3,
77-
'Optical_OC': 0.3,
76+
MDL = {'Thermal_OC': 0.3, # 0.89
77+
'Optical_OC': 0.3, # 0.08
7878
'Thermal_EC': 0.015,
7979
'Optical_EC': 0.015
8080
}

0 commit comments

Comments
 (0)