From 101fabf4b7580c347f91212f9fea4842b1bd6169 Mon Sep 17 00:00:00 2001 From: Chris Pedersen Date: Wed, 22 May 2024 16:07:42 -0500 Subject: [PATCH 1/3] add dependencies --- yahoo_fin.egg-info/requires.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yahoo_fin.egg-info/requires.txt b/yahoo_fin.egg-info/requires.txt index 59755b2..c654d4e 100644 --- a/yahoo_fin.egg-info/requires.txt +++ b/yahoo_fin.egg-info/requires.txt @@ -2,3 +2,6 @@ requests_html feedparser requests pandas +pycryptodome +beautifulsoup4==4.11.1 +bs4==0.0.2 \ No newline at end of file From baaae9d0836ad94d96543b4e4eda8d35fab22a52 Mon Sep 17 00:00:00 2001 From: Chris Pedersen Date: Wed, 22 May 2024 16:16:08 -0500 Subject: [PATCH 2/3] add fixes to _parse_json and _parse_table to account for new breaking changes from yahoo api --- yahoo_fin/stock_info.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/yahoo_fin/stock_info.py b/yahoo_fin/stock_info.py index f75e9b0..7b74339 100644 --- a/yahoo_fin/stock_info.py +++ b/yahoo_fin/stock_info.py @@ -13,6 +13,7 @@ from Crypto.Util.Padding import unpad # For pretty print from pprint import pp +from bs4 import BeautifulSoup try: @@ -446,37 +447,40 @@ def _parse_json(url, headers = {'User-agent': 'Mozilla/5.0'}): html = requests.get(url=url, headers = headers).text - json_str = html.split('root.App.main =')[1].split('(this)')[0].split(';\n}')[0].strip() + json_str = '{}' + + soup = BeautifulSoup(html, "html.parser") + script_tags = soup.find_all('script') + for script_tag in script_tags: + data_url = script_tag.get('data-url') + if data_url and "quoteSummary" in data_url: + if script_tag.contents is not None and len(script_tag.contents): + json_str = script_tag.contents[0] try: data = json.loads(json_str) - #print("type of json_str :", type(data)) - unencrypted_stores = _decrypt_yblob_aes(data) - json_info = unencrypted_stores['QuoteSummaryStore'] - #print("json_info :", json_info) + json_info = data + body_json = json.loads(json_info["body"]) + result = body_json["quoteSummary"]["result"][0] except: return '{}' - #else: - # return data - #new_data = json.dumps(data).replace('{}', 'null') - #new_data = re.sub(r'\{[\'|\"]raw[\'|\"]:(.*?),(.*?)\}', r'\1', new_data) - #json_info = json.loads(new_data) - #print("json info :", json_info) - return json_info + return result -def _parse_table(json_info): +def _parse_table(json_info): df = pd.DataFrame(json_info) - + if df.empty: return df - + del df["maxAge"] - df.set_index("endDate", inplace=True) + df["rawEndDate"] = df["endDate"][0]["raw"] + + df.set_index("rawEndDate", inplace=True) df.index = pd.to_datetime(df.index, unit="s") - + df = df.transpose() df.index.name = "Breakdown" From a45dae8e568263605cb34d67a30d6b6721354507 Mon Sep 17 00:00:00 2001 From: Chris Pedersen Date: Wed, 22 May 2024 16:23:47 -0500 Subject: [PATCH 3/3] return result, not return pp(result) --- yahoo_fin/stock_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yahoo_fin/stock_info.py b/yahoo_fin/stock_info.py index 7b74339..189de97 100644 --- a/yahoo_fin/stock_info.py +++ b/yahoo_fin/stock_info.py @@ -891,7 +891,7 @@ def get_earnings(ticker): result["quarterly_revenue_earnings"] = pd.DataFrame.from_dict(temp["financialsChart"]["quarterly"]) - return (pp(result)) + return result