Skip to content

Commit

Permalink
Revised script and adjusted to new input data format
Browse files Browse the repository at this point in the history
  • Loading branch information
frkunz committed Feb 2, 2016
1 parent d37e54a commit e5e37c0
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 62 deletions.
162 changes: 100 additions & 62 deletions National_Generation_Capacities.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,13 @@
},
"outputs": [],
"source": [
"import urllib.request\n",
"import csv\n",
"import pandas as pd\n",
"import numpy as np\n",
"import posixpath\n",
"import urllib.parse\n",
"import datetime \n",
"import re\n",
"import os.path\n",
"import yaml # http://pyyaml.org/, pip install pyyaml, conda install pyyaml\n",
"import json\n",
"import subprocess\n",
"from bokeh.charts import Scatter, show\n",
"from bokeh.io import output_notebook\n",
"output_notebook()\n",
"\n",
"%matplotlib inline\n",
"import logging\n",
"logger = logging.getLogger('notebook')\n",
Expand Down Expand Up @@ -86,7 +78,7 @@
},
"outputs": [],
"source": [
"data_file = '20160116_Generation_Capacity_Europe_v01__RET_FK.xlsx'\n",
"data_file = 'National_Generation_Capacities.xlsx'\n",
"filepath = 'inputs/'+data_file\n",
"data_raw = pd.read_excel(filepath,\n",
" sheetname='Summary',\n",
Expand All @@ -102,7 +94,7 @@
"data_raw.index.name='technology' \n",
"\n",
"#set multiindex column names\n",
"data_raw.columns=pd.MultiIndex.from_arrays(data_raw[:4].values, names=['country','type','year','source']) \n",
"data_raw.columns=pd.MultiIndex.from_arrays(data_raw[:5].values, names=['country','type','year','source','capacity definition']) \n",
"\n",
"#remove 3 rows which are already used as column names\n",
"data_raw = data_raw[pd.notnull(data_raw.index)] \n",
Expand All @@ -126,14 +118,16 @@
"outputs": [],
"source": [
"# Reshape Dateframe to list\n",
"data = pd.DataFrame(data_raw.stack(level=['source','year','type','country']))\n",
"data = pd.DataFrame(data_raw.stack(level=['source','year','type','country','capacity definition']))\n",
"\n",
"# reset index for Dataframe\n",
"data=data.reset_index()\n",
"data['technology'] = data['technology'].str.replace('- ','')\n",
"data=data.set_index('technology')\n",
"\n",
"# delete entries with missing source\n",
"data = data[data['source'].isnull() == False]\n",
"data = data[data['source'] != 0]\n",
"\n",
"data=data.rename(columns={0: 'value'})\n",
"\n",
Expand Down Expand Up @@ -162,18 +156,25 @@
"\n",
"data.loc['Total','technology_level'] = '0'\n",
"\n",
"data.loc['Conventional fuels','technology_level'] = '1'\n",
"data.loc['Renewable fuels','technology_level'] = '1'\n",
"data.loc['Other fuels','technology_level'] = '1, 2, 3'\n",
"data.loc['Fossil fuels','technology_level'] = '1'\n",
"data.loc['Nuclear','technology_level'] = '1, 2, 3'\n",
"data.loc['Renewable energy sources','technology_level'] = '1'\n",
"data.loc['Other or unspecified energy sources','technology_level'] = '1, 2, 3'\n",
"\n",
"data.loc['Lignite','technology_level'] = '2, 3'\n",
"data.loc['Hard coal','technology_level'] = '2, 3'\n",
"data.loc['Coal derivatives','technology_level'] = '2, 3'\n",
"data.loc['Oil','technology_level'] = '2, 3'\n",
"data.loc['Natural gas','technology_level'] = '2'\n",
"data.loc['Mixed fossil fuels','technology_level'] = '2, 3'\n",
"data.loc['Differently categorized fossil fuels','technology_level'] = '2, 3'\n",
"\n",
"data.loc['Fossil fuels','technology_level'] = '2'\n",
"data.loc['Nuclear','technology_level'] = '2, 3'\n",
"data.loc['Hydro','technology_level'] = '2'\n",
"data.loc['Wind','technology_level'] = '2'\n",
"data.loc['Solar','technology_level'] = '2'\n",
"data.loc['Geothermal','technology_level'] = '2, 3'\n",
"data.loc['Tide, wave, and ocean','technology_level'] = '2, 3'\n",
"data.loc['Other renewable fuels','technology_level'] = '2'\n",
"data.loc['Bioenergy and other renewable fuels','technology_level'] = '2'\n",
"data['technology_level']\n",
"\n",
"data=data.reset_index()\n",
Expand Down Expand Up @@ -295,15 +296,18 @@
},
"outputs": [],
"source": [
"capacity_total_1 = pd.DataFrame(data[data['technology_level'].str.contains('1')].groupby(['source','year','type','country'])['value'].sum())\n",
"capacity_total_2 = pd.DataFrame(data[data['technology_level'].str.contains('2')].groupby(['source','year','type','country'])['value'].sum())\n",
"capacity_total_3 = pd.DataFrame(data[data['technology_level'].str.contains('3')].groupby(['source','year','type','country'])['value'].sum())\n",
"\n",
"capacity_total_comparison = pd.DataFrame(capacity_total_1)\n",
"capacity_total_comparison = pd.merge(capacity_total_1, capacity_total_2,left_index=True,right_index=True,how='left')\n",
"capacity_total_0 = pd.DataFrame(data[data['technology_level'].str.contains('0')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
"capacity_total_1 = pd.DataFrame(data[data['technology_level'].str.contains('1')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
"capacity_total_2 = pd.DataFrame(data[data['technology_level'].str.contains('2')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
"capacity_total_3 = pd.DataFrame(data[data['technology_level'].str.contains('3')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
"\n",
"capacity_total_comparison = pd.DataFrame(capacity_total_0)\n",
"capacity_total_comparison = pd.merge(capacity_total_0, capacity_total_1,left_index=True,right_index=True,how='left')\n",
"capacity_total_comparison = capacity_total_comparison.rename(columns={'value_x': 'technology level 0','value_y': 'technology level 1'})\n",
"capacity_total_comparison = pd.merge(capacity_total_comparison, capacity_total_2,left_index=True,right_index=True,how='left')\n",
"capacity_total_comparison = pd.merge(capacity_total_comparison, capacity_total_3,left_index=True,right_index=True,how='left')\n",
"capacity_total_comparison = capacity_total_comparison.rename(columns={'value_x': 'technology level 2','value_y': 'technology level 3'})\n",
"\n",
"capacity_total_comparison = capacity_total_comparison.rename(columns={'value_x': 'technology level 1','value_y': 'technology level 2','value': 'technology level 3'})\n",
"\n",
"capacity_total_comparison = capacity_total_comparison.sortlevel(['country','year'])\n",
"\n",
Expand All @@ -330,7 +334,7 @@
},
"outputs": [],
"source": [
"capacity_total_comparison = pd.DataFrame(capacity_total_comparison.stack()).reset_index().rename(columns={'level_4': 'technology_level',0: 'value'})\n",
"capacity_total_comparison = pd.DataFrame(capacity_total_comparison.stack()).reset_index().rename(columns={'level_5': 'technology_level',0: 'value'})\n",
"\n",
"capacity_total_pivot = pd.pivot_table(\n",
" # select specific country for comparison\n",
Expand Down Expand Up @@ -381,55 +385,89 @@
"\n",
"metadata = \"\"\"\n",
"\n",
"name: opsd-power-plants-germany\n",
"title: List of power plants in Germany.\n",
"description: This dataset contains an augmented and corrected power plant list based on the power plant list provided by the BNetzA.\n",
"name: opsd-national-generation-capacities\n",
"title: National electricity generation capacities of selected European countries.\n",
"description: This dataset comprises technology-specific aggregated generation capacities for selected European countries.\n",
"version: 0.0.1\n",
"keywords: [power plants,germany]\n",
"keywords: [national generation capacities,europe]\n",
"\n",
"resources:\n",
" path: power_plants_germany.csv\n",
" path: national_generation_capacities.csv\n",
" format: csv\n",
" mediatype: text/csv\n",
" schema: # Schema according to: http://dataprotocols.org/json-table-schema/ \n",
" fields:\n",
" - name: id\n",
" description: \n",
" type: string\n",
" description: ID for data entries \n",
" type: integer\n",
" - name: technology\n",
" description: \n",
" description: Generation technologies defined by fuel and conversion technology\n",
" type: string\n",
" - name: source\n",
" description: \n",
" description: Source of data entry\n",
" type: string\n",
" - name: year\n",
" description: \n",
" type: string\n",
" format:\n",
" description: Year \n",
" type: integer\n",
" format: YYYY\n",
" - name: type\n",
" description: \n",
" type: \n",
" format:\n",
" description: Type of capacity\n",
" type: string\n",
" - name: country\n",
" description: \n",
" type: \n",
" format:\n",
" description: Country \n",
" type: string\n",
" - name: capacity definition\n",
" description: Capacity definition used in the relevant source\n",
" type: string\n",
" - name: value\n",
" description: \n",
" description: Capacity in MW\n",
" type: float\n",
" format:\n",
"\n",
" - name: technology_level\n",
" description: Level of technology definitions (0-total aggregated capacity, 1-aggregated by type of fuel, 2-aggregated by fuel, 3-aggregated by fuel and technology)\n",
" type: integer\n",
" \n",
"licenses:\n",
" - url: http://example.com/license/url/here\n",
" name: License Name Here\n",
" version: 1.0\n",
" id: license-id-from-open\n",
"\n",
"sources:\n",
" - name: ,\n",
" web: \n",
" - name: ,\n",
" web: \n",
" - name: ENTSOE,\n",
" web: https://www.entsoe.eu/publications/system-development-reports/adequacy-forecasts/Pages/default.aspx\n",
" - name: EUROSTAT,\n",
" web: http://ec.europa.eu/energy/en/statistics/country\n",
" - name: e-control,\n",
" web: http://www.e-control.at/statistik/strom/bestandsstatistik\n",
" - name: ELIA,\n",
" web: http://www.elia.be/en/grid-data/power-generation/generating-facilities\n",
" - name: UN,\n",
" web: http://data.un.org/Data.aspx?d=EDATA&f=cmID%3AEC\n",
" - name: BFE,\n",
" web: http://www.bfe.admin.ch/themen/00526/00541/00542/00630/index.html?dossier_id=00765\n",
" - name: ERU,\n",
" web: http://www.eru.cz/en/elektrina/statistika-a-sledovani-kvality/rocni-zpravy-o-provozu\n",
" - name: BMWi,\n",
" web: http://www.bmwi.de/BMWi/Redaktion/Binaer/Energiedaten/energietraeger10-stromerzeugungskapazitaeten-bruttostromerzeugung,property=blob,bereich=bmwi2012,sprache=de,rwb=true.xls\n",
" - name: DEA,\n",
" web: http://www.ens.dk/en/info/facts-figures/energy-statistics-indicators-energy-efficiency/annual-energy-statistics\n",
" - name: REE,\n",
" web: http://www.ree.es/en/publications/statistical-data-of-spanish-electrical-system/national-indicators/\n",
" - name: RTE 2014,\n",
" web: http://www.rte-france.com/en/document/overview-electrical-energy-france-march-2014\n",
" - name: RTE 2015,\n",
" web: http://clients.rte-france.com/lang/an/visiteurs/vie/prod/parc_reference.jsp\n",
" - name: TERNA 2013,\n",
" web: http://download.terna.it/terna/0000/0216/17.XLSX\n",
" - name: TERNA 2014,\n",
" web: http://download.terna.it/terna/0000/0216/16.XLSX\n",
" - name: ILR,\n",
" web: http://www.ilr.public.lu/electricite/statistiques/index.html\n",
" - name: Tennet NL,\n",
" web: http://energieinfo.tennet.org/dataexport/exporteerdatacountry.aspx?id=InstalledCapacity\n",
" - name: CIRE,\n",
" web: http://www.rynek-energii-elektrycznej.cire.pl/st,33,207,tr,75,0,0,0,0,0,podstawowe-dane.html\n",
" \n",
"\n",
"maintainers:\n",
" - name: OPSD-Team,\n",
Expand Down Expand Up @@ -466,7 +504,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
"collapsed": false
},
"outputs": [],
"source": [
Expand All @@ -486,20 +524,20 @@
"data.to_excel(output_path+'national_generation_capacities.xlsx', sheet_name='output')\n",
"\n",
"#Write the information of the metadata\n",
"#with open(os.path.join(output_path, 'datapackage.json'), 'w') as f:\n",
"# f.write(datapackage_json)\n",
"with open(os.path.join(output_path, 'datapackage.json'), 'w') as f:\n",
" f.write(datapackage_json)\n",
"\n",
"#Set this string to this notebook's filename! \n",
"#nb_filename = 'National_Generation_Capacities.ipynb'\n",
"nb_filename = 'National_Generation_Capacities.ipynb'\n",
"\n",
"# Save a copy of the notebook to markdown, to serve as the package README file\n",
"#subprocess.call(['ipython', 'nbconvert', '--to', 'markdown', nb_filename])\n",
"#path_readme = os.path.join(output_path2, 'README.md')\n",
"#try:\n",
"# os.remove(path_readme)\n",
"#except Exception:\n",
"# pass\n",
"#os.rename(nb_filename.replace('.ipynb', '.md'), path_readme) \n",
"subprocess.call(['ipython', 'nbconvert', '--to', 'markdown', nb_filename])\n",
"path_readme = os.path.join(output_path2, 'README.md')\n",
"try:\n",
" os.remove(path_readme)\n",
"except Exception:\n",
" pass\n",
"os.rename(nb_filename.replace('.ipynb', '.md'), path_readme) \n",
" "
]
},
Expand Down
Binary file added inputs/National_Generation_Capacities.xlsx
Binary file not shown.

0 comments on commit e5e37c0

Please sign in to comment.