Skip to content

Commit

Permalink
Merge pull request #984 from Kritika75/data-science
Browse files Browse the repository at this point in the history
Added Economic Analysis using World Bank Data
  • Loading branch information
UTSAVS26 authored Nov 3, 2024
2 parents bc79af7 + f5a5223 commit 79fc93b
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 0 deletions.
115 changes: 115 additions & 0 deletions Data_Science/Economic Analysis/Global_EconoViz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import pandas as pd
import requests
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# Download VADER lexicon for sentiment analysis
nltk.download('vader_lexicon')

# Step 1: Fetching data from World Bank API
def fetch_world_bank_data(indicator, countries):
url = f"http://api.worldbank.org/v2/country/{countries}/indicator/{indicator}?format=json&date=2000:2024"
try:
response = requests.get(url)
response.raise_for_status() # Raise an error for bad responses
data = response.json()
if len(data) < 2:
raise ValueError(f"No data found for indicator: {indicator}")
return pd.json_normalize(data[1])
except Exception as e:
print(f"Error fetching data for {indicator}: {e}")
return pd.DataFrame() # Return an empty DataFrame on error

# Step 2: Data Collection
gdp_data = fetch_world_bank_data("NY.GDP.MKTP.CD", "IN;BR;ZA")
unemployment_data = fetch_world_bank_data("SL.UEM.TOTL.ZS", "IN;BR;ZA")
inflation_data = fetch_world_bank_data("FP.CPI.TOTL", "IN;BR;ZA")
exports_data = fetch_world_bank_data("NE.EXP.GOODS.CD", "IN;BR;ZA")
current_account_data = fetch_world_bank_data("BN.CAB.XOKA.CD", "IN;BR;ZA")

# Step 3: Data Processing
def process_data(df, value_col, rename_cols):
if df.empty:
print(f"No data to process for {rename_cols[1]}.")
return pd.DataFrame(columns=['date', 'Country', rename_cols[1]])

df['date'] = pd.to_datetime(df['date'])
return df[['date', 'country.value', value_col]].rename(columns={'country.value': rename_cols[0], value_col: rename_cols[1]})

gdp_data = process_data(gdp_data, 'value', ['Country', 'GDP'])
unemployment_data = process_data(unemployment_data, 'value', ['Country', 'Unemployment Rate'])
inflation_data = process_data(inflation_data, 'value', ['Country', 'Inflation Rate'])
exports_data = process_data(exports_data, 'value', ['Country', 'Exports'])
current_account_data = process_data(current_account_data, 'value', ['Country', 'Current Account Balance'])

# Debugging checks
print("GDP Data:")
print(gdp_data.head()) # Check GDP data
print("NaN values in GDP Data:", gdp_data.isnull().sum())

# Merge datasets
merged_data = gdp_data.merge(unemployment_data, on=['date', 'Country'], how='outer') \
.merge(inflation_data, on=['date', 'Country'], how='outer') \
.merge(exports_data, on=['date', 'Country'], how='outer') \
.merge(current_account_data, on=['date', 'Country'], how='outer')

# Debugging check for merged data
print("Merged Data:")
print(merged_data.head()) # Check merged data
print("NaN values in Merged Data:", merged_data.isnull().sum())

# Step 4: Data Visualization
# GDP Growth Visualization
if not gdp_data.empty:
fig_gdp = px.line(merged_data, x='date', y='GDP', color='Country', title='GDP Growth Over Time')
fig_gdp.show()
else:
print("No GDP data available for visualization.")

# Unemployment Rate Visualization
if not unemployment_data.empty:
fig_unemployment = px.line(merged_data, x='date', y='Unemployment Rate', color='Country', title='Unemployment Rate Over Time')
fig_unemployment.show()

# Inflation Rate Visualization
if not inflation_data.empty:
fig_inflation = px.line(merged_data, x='date', y='Inflation Rate', color='Country', title='Inflation Rate Over Time')
fig_inflation.show()

# Correlation Heatmap
correlation_columns = ['GDP', 'Unemployment Rate', 'Inflation Rate', 'Exports', 'Current Account Balance']
if not merged_data[correlation_columns].empty:
correlation_matrix = merged_data[correlation_columns].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap of Economic Indicators')
plt.show()
else:
print("Not enough data available for correlation analysis.")

# Step 5: Sentiment Analysis on Economic News
def analyze_sentiment(news_articles):
sia = SentimentIntensityAnalyzer()
sentiments = []
for article in news_articles:
sentiment = sia.polarity_scores(article)
sentiments.append(sentiment)
return pd.DataFrame(sentiments)

# Example news articles (replace with actual news data)
news_articles = [
"India's economy is projected to grow despite global challenges.",
"Rising unemployment rates are a cause for concern in Brazil.",
"South Africa's inflation rate is increasing at an alarming rate."
]

sentiment_df = analyze_sentiment(news_articles)
print("Sentiment Analysis of News Articles:")
print(sentiment_df)

# Step 6: Exporting data to CSV
merged_data.to_csv('economic_analysis_data.csv', index=False)
print("Data has been exported to economic_analysis_data.csv")
28 changes: 28 additions & 0 deletions Data_Science/Economic Analysis/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Global Economic Visualization

## Overview
This project provides a comprehensive visualization of key economic indicators, including GDP, unemployment rates, inflation rates, exports, and current account balances for selected countries. It uses data from the World Bank API and employs various Python libraries for data manipulation and visualization.

## Features
- Fetches economic data for selected countries from the World Bank API.
- Visualizes GDP growth, unemployment rate, and inflation rate over time.
- Provides a correlation heatmap of economic indicators.
- Conducts sentiment analysis on economic news articles.
- Exports the merged economic data to a CSV file for further analysis.

## Technologies Used
- **Python**: Main programming language for data processing and visualization.
- **Pandas**: Data manipulation and analysis.
- **Plotly**: Interactive data visualization for graphs.
- **Seaborn & Matplotlib**: Statistical data visualization.
- **NLTK**: Natural Language Toolkit for sentiment analysis.
- **Requests**: For making API calls to fetch economic data.

## Getting Started

### Prerequisites
Before running the project, ensure you have the following installed:
- Python 3.x
- Required Python libraries (can be installed via pip):
```bash
pip install pandas plotly seaborn matplotlib nltk requests
51 changes: 51 additions & 0 deletions Data_Science/Economic Analysis/economic_analysis_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
date,Country,GDP,Unemployment Rate,Inflation Rate,Exports,Current Account Balance
2000-01-01,Brazil,655448231983.527,10.892,52.5315827425714,,-26530848798.8242
2000-01-01,India,468395521654.458,7.856,54.3383216485078,,-4601248652.10514
2001-01-01,Brazil,559983634798.981,10.649,56.1249316036126,,-24890077179.8191
2001-01-01,India,485440139204.171,8.039,56.3919261013738,,1410181566.42623
2002-01-01,Brazil,509795273806.715,10.641,60.8675805806435,,-9407043257.05195
2002-01-01,India,514939140318.756,8.248,58.815172903837,,7059496037.43388
2003-01-01,Brazil,558233745651.874,11.169,69.8241962003047,,2193173018.42129
2003-01-01,India,607700687237.318,8.397,61.0535954523922,,8772510263.57572
2004-01-01,Brazil,669289424806.307,10.072,74.430627668128,,8959313364.24297
2004-01-01,India,709152728830.775,8.551,63.3536380862151,,780195721.51843
2005-01-01,Brazil,891633839853.105,10.551,79.5436673306746,,11679374942.1176
2005-01-01,India,820383763511.445,8.697,66.0438512553292,,-10283543307.9807
2006-01-01,Brazil,1107626541710.74,9.692,82.8714308457339,,10773999924.4617
2006-01-01,India,940259888787.721,8.614,69.8720985315016,,-9299060317.4628
2007-01-01,Brazil,1397114486471.38,9.28,85.8890058743968,,-2753670195.51863
2007-01-01,India,1216736438834.96,8.534,74.3249644718143,,-8075694483.59495
2008-01-01,Brazil,1695855083552.05,8.268,90.7662937251916,,-35601727612.843
2008-01-01,India,1198895139005.92,8.486,80.5305542396968,,-30971987180.9349
2009-01-01,Brazil,1666996438681.46,9.419,95.202981748031,,-29328433093.6155
2009-01-01,India,1341888016994.9,8.406,89.2941733775462,,-26186435956.9107
2010-01-01,Brazil,2208837745214.23,8.424,100.0,,-86717778084.25
2010-01-01,India,1675615519484.96,8.318,100.0,,-54515877624.3661
2011-01-01,Brazil,2616156223977.47,7.578,106.636449622131,,-83576205984.365
2011-01-01,India,1823051829895.13,8.222,108.911793364834,,-62517637221.5214
2012-01-01,Brazil,2465227803011.57,7.251,112.398549260784,,-92678171336.37
2012-01-01,India,1827637590410.95,8.156,119.235538897084,,-91471245845.7231
2013-01-01,Brazil,2472819535742.73,7.071,119.372104441451,,-88384030093.09
2013-01-01,India,1856721507621.46,8.088,131.18041028234,,-49122670387.7036
2014-01-01,Brazil,2456043727198.85,6.755,126.927212865611,,-110493241349.855
2014-01-01,India,2039126479155.27,7.992,139.924446113916,,-27314281992.5894
2015-01-01,Brazil,1802212206904.68,8.538,138.388614560102,,-63408884124.215
2015-01-01,India,2103588360044.39,7.894,146.790501522574,,-22456838009.6209
2016-01-01,Brazil,1795693482652.52,11.58,150.482594206408,,-30529374570.13
2016-01-01,India,2294796885663.67,7.8,154.054013105394,,-12113787707.3688
2017-01-01,Brazil,2063514977334.32,12.792,155.668786230018,,-25337408625.13
2017-01-01,India,2651474262755.59,7.723,159.18119775209,,-38167659223.511
2018-01-01,Brazil,1916933898038.36,12.33,161.373814183906,,-54793943203.345
2018-01-01,India,2702929641648.14,7.652,165.451068899504,,-65599439052.6676
2019-01-01,Brazil,1873288205186.45,11.936,167.397860280061,,-68021678683.915
2019-01-01,India,2835606256558.84,6.51,171.621576003377,,-29762864650.3282
2020-01-01,Brazil,1476107231194.11,13.697,172.774291252888,,-28207552292.565
2020-01-01,India,2674851578586.86,7.859,182.988822584425,,32730048588.208
2021-01-01,Brazil,1670647399034.67,13.159,187.117425058296,,-46357789404.9
2021-01-01,India,3167270623260.52,6.38,192.378724699015,,-33422359357.9398
2022-01-01,Brazil,1951923942083.32,9.231,204.482120615775,,-48252882545.63
2022-01-01,India,3353470496885.95,4.822,205.266241146235,,-79050941951.324
2022-01-01,South Africa,405270850098.738,28.838,183.682744465621,,-1877973944.67041
2023-01-01,Brazil,2173665655937.27,7.95,213.87513528772,,-30828236330.145
2023-01-01,India,3549918918777.53,4.172,216.862025027426,,-30246005745.6095
2023-01-01,South Africa,377781600985.873,27.988,194.839466353907,,-6143218091.43801
2 changes: 2 additions & 0 deletions Project-Structure.md
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,8 @@
* [Detailed Eda](Data_Science/Diabetes%20Analysis/EDA/Detailed%20EDA.ipynb)
* Model
* [Logistic+Decision+Random Forest](Data_Science/Diabetes%20Analysis/Model/Logistic+Decision+Random_Forest.ipynb)
* Economic Analysis
* [Global Econoviz](Data_Science/Economic%20Analysis/Global_EconoViz.py)
* Google Search Analysis With Python
* [Google Search Analysis With Python](Data_Science/Google%20Search%20Analysis%20with%20python/Google_Search_Analysis_with_python.ipynb)
* Hotel Demand Eda And Forecasting Using Arima Model
Expand Down

0 comments on commit 79fc93b

Please sign in to comment.