-
Notifications
You must be signed in to change notification settings - Fork 203
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #984 from Kritika75/data-science
Added Economic Analysis using World Bank Data
- Loading branch information
Showing
4 changed files
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import pandas as pd | ||
import requests | ||
import plotly.express as px | ||
import seaborn as sns | ||
import matplotlib.pyplot as plt | ||
from nltk.sentiment.vader import SentimentIntensityAnalyzer | ||
import nltk | ||
|
||
# Download VADER lexicon for sentiment analysis | ||
nltk.download('vader_lexicon') | ||
|
||
# Step 1: Fetching data from World Bank API | ||
def fetch_world_bank_data(indicator, countries): | ||
url = f"http://api.worldbank.org/v2/country/{countries}/indicator/{indicator}?format=json&date=2000:2024" | ||
try: | ||
response = requests.get(url) | ||
response.raise_for_status() # Raise an error for bad responses | ||
data = response.json() | ||
if len(data) < 2: | ||
raise ValueError(f"No data found for indicator: {indicator}") | ||
return pd.json_normalize(data[1]) | ||
except Exception as e: | ||
print(f"Error fetching data for {indicator}: {e}") | ||
return pd.DataFrame() # Return an empty DataFrame on error | ||
|
||
# Step 2: Data Collection | ||
gdp_data = fetch_world_bank_data("NY.GDP.MKTP.CD", "IN;BR;ZA") | ||
unemployment_data = fetch_world_bank_data("SL.UEM.TOTL.ZS", "IN;BR;ZA") | ||
inflation_data = fetch_world_bank_data("FP.CPI.TOTL", "IN;BR;ZA") | ||
exports_data = fetch_world_bank_data("NE.EXP.GOODS.CD", "IN;BR;ZA") | ||
current_account_data = fetch_world_bank_data("BN.CAB.XOKA.CD", "IN;BR;ZA") | ||
|
||
# Step 3: Data Processing | ||
def process_data(df, value_col, rename_cols): | ||
if df.empty: | ||
print(f"No data to process for {rename_cols[1]}.") | ||
return pd.DataFrame(columns=['date', 'Country', rename_cols[1]]) | ||
|
||
df['date'] = pd.to_datetime(df['date']) | ||
return df[['date', 'country.value', value_col]].rename(columns={'country.value': rename_cols[0], value_col: rename_cols[1]}) | ||
|
||
gdp_data = process_data(gdp_data, 'value', ['Country', 'GDP']) | ||
unemployment_data = process_data(unemployment_data, 'value', ['Country', 'Unemployment Rate']) | ||
inflation_data = process_data(inflation_data, 'value', ['Country', 'Inflation Rate']) | ||
exports_data = process_data(exports_data, 'value', ['Country', 'Exports']) | ||
current_account_data = process_data(current_account_data, 'value', ['Country', 'Current Account Balance']) | ||
|
||
# Debugging checks | ||
print("GDP Data:") | ||
print(gdp_data.head()) # Check GDP data | ||
print("NaN values in GDP Data:", gdp_data.isnull().sum()) | ||
|
||
# Merge datasets | ||
merged_data = gdp_data.merge(unemployment_data, on=['date', 'Country'], how='outer') \ | ||
.merge(inflation_data, on=['date', 'Country'], how='outer') \ | ||
.merge(exports_data, on=['date', 'Country'], how='outer') \ | ||
.merge(current_account_data, on=['date', 'Country'], how='outer') | ||
|
||
# Debugging check for merged data | ||
print("Merged Data:") | ||
print(merged_data.head()) # Check merged data | ||
print("NaN values in Merged Data:", merged_data.isnull().sum()) | ||
|
||
# Step 4: Data Visualization | ||
# GDP Growth Visualization | ||
if not gdp_data.empty: | ||
fig_gdp = px.line(merged_data, x='date', y='GDP', color='Country', title='GDP Growth Over Time') | ||
fig_gdp.show() | ||
else: | ||
print("No GDP data available for visualization.") | ||
|
||
# Unemployment Rate Visualization | ||
if not unemployment_data.empty: | ||
fig_unemployment = px.line(merged_data, x='date', y='Unemployment Rate', color='Country', title='Unemployment Rate Over Time') | ||
fig_unemployment.show() | ||
|
||
# Inflation Rate Visualization | ||
if not inflation_data.empty: | ||
fig_inflation = px.line(merged_data, x='date', y='Inflation Rate', color='Country', title='Inflation Rate Over Time') | ||
fig_inflation.show() | ||
|
||
# Correlation Heatmap | ||
correlation_columns = ['GDP', 'Unemployment Rate', 'Inflation Rate', 'Exports', 'Current Account Balance'] | ||
if not merged_data[correlation_columns].empty: | ||
correlation_matrix = merged_data[correlation_columns].corr() | ||
plt.figure(figsize=(10, 8)) | ||
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f') | ||
plt.title('Correlation Heatmap of Economic Indicators') | ||
plt.show() | ||
else: | ||
print("Not enough data available for correlation analysis.") | ||
|
||
# Step 5: Sentiment Analysis on Economic News | ||
def analyze_sentiment(news_articles): | ||
sia = SentimentIntensityAnalyzer() | ||
sentiments = [] | ||
for article in news_articles: | ||
sentiment = sia.polarity_scores(article) | ||
sentiments.append(sentiment) | ||
return pd.DataFrame(sentiments) | ||
|
||
# Example news articles (replace with actual news data) | ||
news_articles = [ | ||
"India's economy is projected to grow despite global challenges.", | ||
"Rising unemployment rates are a cause for concern in Brazil.", | ||
"South Africa's inflation rate is increasing at an alarming rate." | ||
] | ||
|
||
sentiment_df = analyze_sentiment(news_articles) | ||
print("Sentiment Analysis of News Articles:") | ||
print(sentiment_df) | ||
|
||
# Step 6: Exporting data to CSV | ||
merged_data.to_csv('economic_analysis_data.csv', index=False) | ||
print("Data has been exported to economic_analysis_data.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Global Economic Visualization | ||
|
||
## Overview | ||
This project provides a comprehensive visualization of key economic indicators, including GDP, unemployment rates, inflation rates, exports, and current account balances for selected countries. It uses data from the World Bank API and employs various Python libraries for data manipulation and visualization. | ||
|
||
## Features | ||
- Fetches economic data for selected countries from the World Bank API. | ||
- Visualizes GDP growth, unemployment rate, and inflation rate over time. | ||
- Provides a correlation heatmap of economic indicators. | ||
- Conducts sentiment analysis on economic news articles. | ||
- Exports the merged economic data to a CSV file for further analysis. | ||
|
||
## Technologies Used | ||
- **Python**: Main programming language for data processing and visualization. | ||
- **Pandas**: Data manipulation and analysis. | ||
- **Plotly**: Interactive data visualization for graphs. | ||
- **Seaborn & Matplotlib**: Statistical data visualization. | ||
- **NLTK**: Natural Language Toolkit for sentiment analysis. | ||
- **Requests**: For making API calls to fetch economic data. | ||
|
||
## Getting Started | ||
|
||
### Prerequisites | ||
Before running the project, ensure you have the following installed: | ||
- Python 3.x | ||
- Required Python libraries (can be installed via pip): | ||
```bash | ||
pip install pandas plotly seaborn matplotlib nltk requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
date,Country,GDP,Unemployment Rate,Inflation Rate,Exports,Current Account Balance | ||
2000-01-01,Brazil,655448231983.527,10.892,52.5315827425714,,-26530848798.8242 | ||
2000-01-01,India,468395521654.458,7.856,54.3383216485078,,-4601248652.10514 | ||
2001-01-01,Brazil,559983634798.981,10.649,56.1249316036126,,-24890077179.8191 | ||
2001-01-01,India,485440139204.171,8.039,56.3919261013738,,1410181566.42623 | ||
2002-01-01,Brazil,509795273806.715,10.641,60.8675805806435,,-9407043257.05195 | ||
2002-01-01,India,514939140318.756,8.248,58.815172903837,,7059496037.43388 | ||
2003-01-01,Brazil,558233745651.874,11.169,69.8241962003047,,2193173018.42129 | ||
2003-01-01,India,607700687237.318,8.397,61.0535954523922,,8772510263.57572 | ||
2004-01-01,Brazil,669289424806.307,10.072,74.430627668128,,8959313364.24297 | ||
2004-01-01,India,709152728830.775,8.551,63.3536380862151,,780195721.51843 | ||
2005-01-01,Brazil,891633839853.105,10.551,79.5436673306746,,11679374942.1176 | ||
2005-01-01,India,820383763511.445,8.697,66.0438512553292,,-10283543307.9807 | ||
2006-01-01,Brazil,1107626541710.74,9.692,82.8714308457339,,10773999924.4617 | ||
2006-01-01,India,940259888787.721,8.614,69.8720985315016,,-9299060317.4628 | ||
2007-01-01,Brazil,1397114486471.38,9.28,85.8890058743968,,-2753670195.51863 | ||
2007-01-01,India,1216736438834.96,8.534,74.3249644718143,,-8075694483.59495 | ||
2008-01-01,Brazil,1695855083552.05,8.268,90.7662937251916,,-35601727612.843 | ||
2008-01-01,India,1198895139005.92,8.486,80.5305542396968,,-30971987180.9349 | ||
2009-01-01,Brazil,1666996438681.46,9.419,95.202981748031,,-29328433093.6155 | ||
2009-01-01,India,1341888016994.9,8.406,89.2941733775462,,-26186435956.9107 | ||
2010-01-01,Brazil,2208837745214.23,8.424,100.0,,-86717778084.25 | ||
2010-01-01,India,1675615519484.96,8.318,100.0,,-54515877624.3661 | ||
2011-01-01,Brazil,2616156223977.47,7.578,106.636449622131,,-83576205984.365 | ||
2011-01-01,India,1823051829895.13,8.222,108.911793364834,,-62517637221.5214 | ||
2012-01-01,Brazil,2465227803011.57,7.251,112.398549260784,,-92678171336.37 | ||
2012-01-01,India,1827637590410.95,8.156,119.235538897084,,-91471245845.7231 | ||
2013-01-01,Brazil,2472819535742.73,7.071,119.372104441451,,-88384030093.09 | ||
2013-01-01,India,1856721507621.46,8.088,131.18041028234,,-49122670387.7036 | ||
2014-01-01,Brazil,2456043727198.85,6.755,126.927212865611,,-110493241349.855 | ||
2014-01-01,India,2039126479155.27,7.992,139.924446113916,,-27314281992.5894 | ||
2015-01-01,Brazil,1802212206904.68,8.538,138.388614560102,,-63408884124.215 | ||
2015-01-01,India,2103588360044.39,7.894,146.790501522574,,-22456838009.6209 | ||
2016-01-01,Brazil,1795693482652.52,11.58,150.482594206408,,-30529374570.13 | ||
2016-01-01,India,2294796885663.67,7.8,154.054013105394,,-12113787707.3688 | ||
2017-01-01,Brazil,2063514977334.32,12.792,155.668786230018,,-25337408625.13 | ||
2017-01-01,India,2651474262755.59,7.723,159.18119775209,,-38167659223.511 | ||
2018-01-01,Brazil,1916933898038.36,12.33,161.373814183906,,-54793943203.345 | ||
2018-01-01,India,2702929641648.14,7.652,165.451068899504,,-65599439052.6676 | ||
2019-01-01,Brazil,1873288205186.45,11.936,167.397860280061,,-68021678683.915 | ||
2019-01-01,India,2835606256558.84,6.51,171.621576003377,,-29762864650.3282 | ||
2020-01-01,Brazil,1476107231194.11,13.697,172.774291252888,,-28207552292.565 | ||
2020-01-01,India,2674851578586.86,7.859,182.988822584425,,32730048588.208 | ||
2021-01-01,Brazil,1670647399034.67,13.159,187.117425058296,,-46357789404.9 | ||
2021-01-01,India,3167270623260.52,6.38,192.378724699015,,-33422359357.9398 | ||
2022-01-01,Brazil,1951923942083.32,9.231,204.482120615775,,-48252882545.63 | ||
2022-01-01,India,3353470496885.95,4.822,205.266241146235,,-79050941951.324 | ||
2022-01-01,South Africa,405270850098.738,28.838,183.682744465621,,-1877973944.67041 | ||
2023-01-01,Brazil,2173665655937.27,7.95,213.87513528772,,-30828236330.145 | ||
2023-01-01,India,3549918918777.53,4.172,216.862025027426,,-30246005745.6095 | ||
2023-01-01,South Africa,377781600985.873,27.988,194.839466353907,,-6143218091.43801 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters