Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
SunsetWolf committed Nov 1, 2023
1 parent 3b70cf5 commit 3af3cae
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions scripts/data_collector/yahoo/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,20 +390,26 @@ def normalize_yahoo(
return df
symbol = df.loc[df[symbol_field_name].first_valid_index(), symbol_field_name]
columns = copy.deepcopy(YahooNormalize.COLUMNS)
df = df.copy()
df.set_index(date_field_name, inplace=True)
df.index = pd.to_datetime(df.index)
df = df[~df.index.duplicated(keep="first")]
df_tmp = df.copy()
if calendar_list is not None:
df = df.reindex(
df_tmp = df_tmp.reindex(
pd.DataFrame(index=calendar_list)
.loc[
pd.Timestamp(df.index.min()).date() : pd.Timestamp(df.index.max()).date()
pd.Timestamp(df_tmp.index.min()).date() : pd.Timestamp(df_tmp.index.max()).date()
+ pd.Timedelta(hours=23, minutes=59)
]
.index
)
df.sort_index(inplace=True)
df_tmp.index = pd.to_datetime(df_tmp.index)
df_tmp.sort_index(inplace=True)
df_tmp.index = df_tmp.index.tz_localize(None)
df.index = df.index.tz_localize(None)
df_tmp['symbol'] = df.iloc[0]['symbol']
df_tmp = df_tmp.drop(columns=['open', 'high', 'low', 'close', 'volume'])
df = df_tmp.merge(df[["open", "high", "low", "close", "volume"]], left_index=True, right_index=True, how='left')
df.loc[(df["volume"] <= 0) | np.isnan(df["volume"]), list(set(df.columns) - {symbol_field_name})] = np.nan

change_series = YahooNormalize.calc_change(df, last_close)
Expand Down Expand Up @@ -536,6 +542,9 @@ def normalize(self, df: pd.DataFrame) -> pd.DataFrame:
df = super(YahooNormalize1dExtend, self).normalize(df)
df.set_index(self._date_field_name, inplace=True)
symbol_name = df[self._symbol_field_name].iloc[0]
old_symbol_list = self.old_qlib_data.index.get_level_values('instrument').unique().to_list()
if str(symbol_name).upper() not in old_symbol_list:
return df.reset_index()
old_df = self.old_qlib_data.loc[str(symbol_name).upper()]
latest_date = old_df.index[-1]
df = df.loc[latest_date:]
Expand Down

0 comments on commit 3af3cae

Please sign in to comment.