From 1156c0dd837fb313d82875a25b977cf6c3e3cb66 Mon Sep 17 00:00:00 2001 From: Kritika Singh Date: Thu, 31 Oct 2024 21:20:15 +0530 Subject: [PATCH 1/2] Add files via upload --- .../Economic Analysis/Global_EconoViz.py | 115 ++++++++++++++++++ Data_Science/Economic Analysis/README.md | 28 +++++ .../economic_analysis_data.csv | 51 ++++++++ 3 files changed, 194 insertions(+) create mode 100644 Data_Science/Economic Analysis/Global_EconoViz.py create mode 100644 Data_Science/Economic Analysis/README.md create mode 100644 Data_Science/Economic Analysis/economic_analysis_data.csv diff --git a/Data_Science/Economic Analysis/Global_EconoViz.py b/Data_Science/Economic Analysis/Global_EconoViz.py new file mode 100644 index 0000000000..cf99aaae7b --- /dev/null +++ b/Data_Science/Economic Analysis/Global_EconoViz.py @@ -0,0 +1,115 @@ +import pandas as pd +import requests +import plotly.express as px +import seaborn as sns +import matplotlib.pyplot as plt +from nltk.sentiment.vader import SentimentIntensityAnalyzer +import nltk + +# Download VADER lexicon for sentiment analysis +nltk.download('vader_lexicon') + +# Step 1: Fetching data from World Bank API +def fetch_world_bank_data(indicator, countries): + url = f"http://api.worldbank.org/v2/country/{countries}/indicator/{indicator}?format=json&date=2000:2024" + try: + response = requests.get(url) + response.raise_for_status() # Raise an error for bad responses + data = response.json() + if len(data) < 2: + raise ValueError(f"No data found for indicator: {indicator}") + return pd.json_normalize(data[1]) + except Exception as e: + print(f"Error fetching data for {indicator}: {e}") + return pd.DataFrame() # Return an empty DataFrame on error + +# Step 2: Data Collection +gdp_data = fetch_world_bank_data("NY.GDP.MKTP.CD", "IN;BR;ZA") +unemployment_data = fetch_world_bank_data("SL.UEM.TOTL.ZS", "IN;BR;ZA") +inflation_data = fetch_world_bank_data("FP.CPI.TOTL", "IN;BR;ZA") +exports_data = fetch_world_bank_data("NE.EXP.GOODS.CD", "IN;BR;ZA") +current_account_data = fetch_world_bank_data("BN.CAB.XOKA.CD", "IN;BR;ZA") + +# Step 3: Data Processing +def process_data(df, value_col, rename_cols): + if df.empty: + print(f"No data to process for {rename_cols[1]}.") + return pd.DataFrame(columns=['date', 'Country', rename_cols[1]]) + + df['date'] = pd.to_datetime(df['date']) + return df[['date', 'country.value', value_col]].rename(columns={'country.value': rename_cols[0], value_col: rename_cols[1]}) + +gdp_data = process_data(gdp_data, 'value', ['Country', 'GDP']) +unemployment_data = process_data(unemployment_data, 'value', ['Country', 'Unemployment Rate']) +inflation_data = process_data(inflation_data, 'value', ['Country', 'Inflation Rate']) +exports_data = process_data(exports_data, 'value', ['Country', 'Exports']) +current_account_data = process_data(current_account_data, 'value', ['Country', 'Current Account Balance']) + +# Debugging checks +print("GDP Data:") +print(gdp_data.head()) # Check GDP data +print("NaN values in GDP Data:", gdp_data.isnull().sum()) + +# Merge datasets +merged_data = gdp_data.merge(unemployment_data, on=['date', 'Country'], how='outer') \ + .merge(inflation_data, on=['date', 'Country'], how='outer') \ + .merge(exports_data, on=['date', 'Country'], how='outer') \ + .merge(current_account_data, on=['date', 'Country'], how='outer') + +# Debugging check for merged data +print("Merged Data:") +print(merged_data.head()) # Check merged data +print("NaN values in Merged Data:", merged_data.isnull().sum()) + +# Step 4: Data Visualization +# GDP Growth Visualization +if not gdp_data.empty: + fig_gdp = px.line(merged_data, x='date', y='GDP', color='Country', title='GDP Growth Over Time') + fig_gdp.show() +else: + print("No GDP data available for visualization.") + +# Unemployment Rate Visualization +if not unemployment_data.empty: + fig_unemployment = px.line(merged_data, x='date', y='Unemployment Rate', color='Country', title='Unemployment Rate Over Time') + fig_unemployment.show() + +# Inflation Rate Visualization +if not inflation_data.empty: + fig_inflation = px.line(merged_data, x='date', y='Inflation Rate', color='Country', title='Inflation Rate Over Time') + fig_inflation.show() + +# Correlation Heatmap +correlation_columns = ['GDP', 'Unemployment Rate', 'Inflation Rate', 'Exports', 'Current Account Balance'] +if not merged_data[correlation_columns].empty: + correlation_matrix = merged_data[correlation_columns].corr() + plt.figure(figsize=(10, 8)) + sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f') + plt.title('Correlation Heatmap of Economic Indicators') + plt.show() +else: + print("Not enough data available for correlation analysis.") + +# Step 5: Sentiment Analysis on Economic News +def analyze_sentiment(news_articles): + sia = SentimentIntensityAnalyzer() + sentiments = [] + for article in news_articles: + sentiment = sia.polarity_scores(article) + sentiments.append(sentiment) + return pd.DataFrame(sentiments) + +# Example news articles (replace with actual news data) +news_articles = [ + "India's economy is projected to grow despite global challenges.", + "Rising unemployment rates are a cause for concern in Brazil.", + "South Africa's inflation rate is increasing at an alarming rate." +] + +sentiment_df = analyze_sentiment(news_articles) +print("Sentiment Analysis of News Articles:") +print(sentiment_df) + +# Step 6: Exporting data to CSV +merged_data.to_csv('economic_analysis_data.csv', index=False) +print("Data has been exported to economic_analysis_data.csv") diff --git a/Data_Science/Economic Analysis/README.md b/Data_Science/Economic Analysis/README.md new file mode 100644 index 0000000000..b54dd0c5e4 --- /dev/null +++ b/Data_Science/Economic Analysis/README.md @@ -0,0 +1,28 @@ +# Global Economic Visualization + +## Overview +This project provides a comprehensive visualization of key economic indicators, including GDP, unemployment rates, inflation rates, exports, and current account balances for selected countries. It uses data from the World Bank API and employs various Python libraries for data manipulation and visualization. + +## Features +- Fetches economic data for selected countries from the World Bank API. +- Visualizes GDP growth, unemployment rate, and inflation rate over time. +- Provides a correlation heatmap of economic indicators. +- Conducts sentiment analysis on economic news articles. +- Exports the merged economic data to a CSV file for further analysis. + +## Technologies Used +- **Python**: Main programming language for data processing and visualization. +- **Pandas**: Data manipulation and analysis. +- **Plotly**: Interactive data visualization for graphs. +- **Seaborn & Matplotlib**: Statistical data visualization. +- **NLTK**: Natural Language Toolkit for sentiment analysis. +- **Requests**: For making API calls to fetch economic data. + +## Getting Started + +### Prerequisites +Before running the project, ensure you have the following installed: +- Python 3.x +- Required Python libraries (can be installed via pip): + ```bash + pip install pandas plotly seaborn matplotlib nltk requests diff --git a/Data_Science/Economic Analysis/economic_analysis_data.csv b/Data_Science/Economic Analysis/economic_analysis_data.csv new file mode 100644 index 0000000000..8305492ef8 --- /dev/null +++ b/Data_Science/Economic Analysis/economic_analysis_data.csv @@ -0,0 +1,51 @@ +date,Country,GDP,Unemployment Rate,Inflation Rate,Exports,Current Account Balance +2000-01-01,Brazil,655448231983.527,10.892,52.5315827425714,,-26530848798.8242 +2000-01-01,India,468395521654.458,7.856,54.3383216485078,,-4601248652.10514 +2001-01-01,Brazil,559983634798.981,10.649,56.1249316036126,,-24890077179.8191 +2001-01-01,India,485440139204.171,8.039,56.3919261013738,,1410181566.42623 +2002-01-01,Brazil,509795273806.715,10.641,60.8675805806435,,-9407043257.05195 +2002-01-01,India,514939140318.756,8.248,58.815172903837,,7059496037.43388 +2003-01-01,Brazil,558233745651.874,11.169,69.8241962003047,,2193173018.42129 +2003-01-01,India,607700687237.318,8.397,61.0535954523922,,8772510263.57572 +2004-01-01,Brazil,669289424806.307,10.072,74.430627668128,,8959313364.24297 +2004-01-01,India,709152728830.775,8.551,63.3536380862151,,780195721.51843 +2005-01-01,Brazil,891633839853.105,10.551,79.5436673306746,,11679374942.1176 +2005-01-01,India,820383763511.445,8.697,66.0438512553292,,-10283543307.9807 +2006-01-01,Brazil,1107626541710.74,9.692,82.8714308457339,,10773999924.4617 +2006-01-01,India,940259888787.721,8.614,69.8720985315016,,-9299060317.4628 +2007-01-01,Brazil,1397114486471.38,9.28,85.8890058743968,,-2753670195.51863 +2007-01-01,India,1216736438834.96,8.534,74.3249644718143,,-8075694483.59495 +2008-01-01,Brazil,1695855083552.05,8.268,90.7662937251916,,-35601727612.843 +2008-01-01,India,1198895139005.92,8.486,80.5305542396968,,-30971987180.9349 +2009-01-01,Brazil,1666996438681.46,9.419,95.202981748031,,-29328433093.6155 +2009-01-01,India,1341888016994.9,8.406,89.2941733775462,,-26186435956.9107 +2010-01-01,Brazil,2208837745214.23,8.424,100.0,,-86717778084.25 +2010-01-01,India,1675615519484.96,8.318,100.0,,-54515877624.3661 +2011-01-01,Brazil,2616156223977.47,7.578,106.636449622131,,-83576205984.365 +2011-01-01,India,1823051829895.13,8.222,108.911793364834,,-62517637221.5214 +2012-01-01,Brazil,2465227803011.57,7.251,112.398549260784,,-92678171336.37 +2012-01-01,India,1827637590410.95,8.156,119.235538897084,,-91471245845.7231 +2013-01-01,Brazil,2472819535742.73,7.071,119.372104441451,,-88384030093.09 +2013-01-01,India,1856721507621.46,8.088,131.18041028234,,-49122670387.7036 +2014-01-01,Brazil,2456043727198.85,6.755,126.927212865611,,-110493241349.855 +2014-01-01,India,2039126479155.27,7.992,139.924446113916,,-27314281992.5894 +2015-01-01,Brazil,1802212206904.68,8.538,138.388614560102,,-63408884124.215 +2015-01-01,India,2103588360044.39,7.894,146.790501522574,,-22456838009.6209 +2016-01-01,Brazil,1795693482652.52,11.58,150.482594206408,,-30529374570.13 +2016-01-01,India,2294796885663.67,7.8,154.054013105394,,-12113787707.3688 +2017-01-01,Brazil,2063514977334.32,12.792,155.668786230018,,-25337408625.13 +2017-01-01,India,2651474262755.59,7.723,159.18119775209,,-38167659223.511 +2018-01-01,Brazil,1916933898038.36,12.33,161.373814183906,,-54793943203.345 +2018-01-01,India,2702929641648.14,7.652,165.451068899504,,-65599439052.6676 +2019-01-01,Brazil,1873288205186.45,11.936,167.397860280061,,-68021678683.915 +2019-01-01,India,2835606256558.84,6.51,171.621576003377,,-29762864650.3282 +2020-01-01,Brazil,1476107231194.11,13.697,172.774291252888,,-28207552292.565 +2020-01-01,India,2674851578586.86,7.859,182.988822584425,,32730048588.208 +2021-01-01,Brazil,1670647399034.67,13.159,187.117425058296,,-46357789404.9 +2021-01-01,India,3167270623260.52,6.38,192.378724699015,,-33422359357.9398 +2022-01-01,Brazil,1951923942083.32,9.231,204.482120615775,,-48252882545.63 +2022-01-01,India,3353470496885.95,4.822,205.266241146235,,-79050941951.324 +2022-01-01,South Africa,405270850098.738,28.838,183.682744465621,,-1877973944.67041 +2023-01-01,Brazil,2173665655937.27,7.95,213.87513528772,,-30828236330.145 +2023-01-01,India,3549918918777.53,4.172,216.862025027426,,-30246005745.6095 +2023-01-01,South Africa,377781600985.873,27.988,194.839466353907,,-6143218091.43801 From f5a5223a3c284666a6577f073218339883e66f2d Mon Sep 17 00:00:00 2001 From: Kritika75 Date: Thu, 31 Oct 2024 15:51:05 +0000 Subject: [PATCH 2/2] updating Project-Structure.md --- Project-Structure.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project-Structure.md b/Project-Structure.md index 40f2eab947..9d98517502 100644 --- a/Project-Structure.md +++ b/Project-Structure.md @@ -409,6 +409,8 @@ * [Detailed Eda](Data_Science/Diabetes%20Analysis/EDA/Detailed%20EDA.ipynb) * Model * [Logistic+Decision+Random Forest](Data_Science/Diabetes%20Analysis/Model/Logistic+Decision+Random_Forest.ipynb) + * Economic Analysis + * [Global Econoviz](Data_Science/Economic%20Analysis/Global_EconoViz.py) * Google Search Analysis With Python * [Google Search Analysis With Python](Data_Science/Google%20Search%20Analysis%20with%20python/Google_Search_Analysis_with_python.ipynb) * Hotel Demand Eda And Forecasting Using Arima Model