Revised script and adjusted to new input data format

Open-Power-System-Data · Feb 2, 2016 · e5e37c0 · e5e37c0
1 parent d37e54a
commit e5e37c0
Show file tree

Hide file tree

Showing 2 changed files with 100 additions and 62 deletions.
diff --git a/National_Generation_Capacities.ipynb b/National_Generation_Capacities.ipynb
@@ -42,21 +42,13 @@
    },
    "outputs": [],
    "source": [
-    "import urllib.request\n",
-    "import csv\n",
     "import pandas as pd\n",
     "import numpy as np\n",
-    "import posixpath\n",
-    "import urllib.parse\n",
-    "import datetime  \n",
-    "import re\n",
     "import os.path\n",
     "import yaml  # http://pyyaml.org/, pip install pyyaml, conda install pyyaml\n",
     "import json\n",
     "import subprocess\n",
-    "from bokeh.charts import Scatter, show\n",
-    "from bokeh.io import output_notebook\n",
-    "output_notebook()\n",
+    "\n",
     "%matplotlib inline\n",
     "import logging\n",
     "logger = logging.getLogger('notebook')\n",
@@ -86,7 +78,7 @@
    },
    "outputs": [],
    "source": [
-    "data_file = '20160116_Generation_Capacity_Europe_v01__RET_FK.xlsx'\n",
+    "data_file = 'National_Generation_Capacities.xlsx'\n",
     "filepath = 'inputs/'+data_file\n",
     "data_raw = pd.read_excel(filepath,\n",
     "                     sheetname='Summary',\n",
@@ -102,7 +94,7 @@
     "data_raw.index.name='technology' \n",
     "\n",
     "#set multiindex column names\n",
-    "data_raw.columns=pd.MultiIndex.from_arrays(data_raw[:4].values, names=['country','type','year','source']) \n",
+    "data_raw.columns=pd.MultiIndex.from_arrays(data_raw[:5].values, names=['country','type','year','source','capacity definition']) \n",
     "\n",
     "#remove 3 rows which are already used as column names\n",
     "data_raw = data_raw[pd.notnull(data_raw.index)] \n",
@@ -126,14 +118,16 @@
    "outputs": [],
    "source": [
     "# Reshape Dateframe to list\n",
-    "data = pd.DataFrame(data_raw.stack(level=['source','year','type','country']))\n",
+    "data = pd.DataFrame(data_raw.stack(level=['source','year','type','country','capacity definition']))\n",
     "\n",
     "# reset index for Dataframe\n",
     "data=data.reset_index()\n",
+    "data['technology'] = data['technology'].str.replace('- ','')\n",
     "data=data.set_index('technology')\n",
     "\n",
     "# delete entries with missing source\n",
     "data = data[data['source'].isnull() == False]\n",
+    "data = data[data['source'] != 0]\n",
     "\n",
     "data=data.rename(columns={0: 'value'})\n",
     "\n",
@@ -162,18 +156,25 @@
     "\n",
     "data.loc['Total','technology_level'] = '0'\n",
     "\n",
-    "data.loc['Conventional fuels','technology_level'] = '1'\n",
-    "data.loc['Renewable fuels','technology_level'] = '1'\n",
-    "data.loc['Other fuels','technology_level'] = '1, 2, 3'\n",
+    "data.loc['Fossil fuels','technology_level'] = '1'\n",
+    "data.loc['Nuclear','technology_level'] = '1, 2, 3'\n",
+    "data.loc['Renewable energy sources','technology_level'] = '1'\n",
+    "data.loc['Other or unspecified energy sources','technology_level'] = '1, 2, 3'\n",
+    "\n",
+    "data.loc['Lignite','technology_level'] = '2, 3'\n",
+    "data.loc['Hard coal','technology_level'] = '2, 3'\n",
+    "data.loc['Coal derivatives','technology_level'] = '2, 3'\n",
+    "data.loc['Oil','technology_level'] = '2, 3'\n",
+    "data.loc['Natural gas','technology_level'] = '2'\n",
+    "data.loc['Mixed fossil fuels','technology_level'] = '2, 3'\n",
+    "data.loc['Differently categorized fossil fuels','technology_level'] = '2, 3'\n",
     "\n",
-    "data.loc['Fossil fuels','technology_level'] = '2'\n",
-    "data.loc['Nuclear','technology_level'] = '2, 3'\n",
     "data.loc['Hydro','technology_level'] = '2'\n",
     "data.loc['Wind','technology_level'] = '2'\n",
     "data.loc['Solar','technology_level'] = '2'\n",
     "data.loc['Geothermal','technology_level'] = '2, 3'\n",
     "data.loc['Tide, wave, and ocean','technology_level'] = '2, 3'\n",
-    "data.loc['Other renewable fuels','technology_level'] = '2'\n",
+    "data.loc['Bioenergy and other renewable fuels','technology_level'] = '2'\n",
     "data['technology_level']\n",
     "\n",
     "data=data.reset_index()\n",
@@ -295,15 +296,18 @@
    },
    "outputs": [],
    "source": [
-    "capacity_total_1 = pd.DataFrame(data[data['technology_level'].str.contains('1')].groupby(['source','year','type','country'])['value'].sum())\n",
-    "capacity_total_2 = pd.DataFrame(data[data['technology_level'].str.contains('2')].groupby(['source','year','type','country'])['value'].sum())\n",
-    "capacity_total_3 = pd.DataFrame(data[data['technology_level'].str.contains('3')].groupby(['source','year','type','country'])['value'].sum())\n",
-    "\n",
-    "capacity_total_comparison = pd.DataFrame(capacity_total_1)\n",
-    "capacity_total_comparison = pd.merge(capacity_total_1, capacity_total_2,left_index=True,right_index=True,how='left')\n",
+    "capacity_total_0 = pd.DataFrame(data[data['technology_level'].str.contains('0')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
+    "capacity_total_1 = pd.DataFrame(data[data['technology_level'].str.contains('1')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
+    "capacity_total_2 = pd.DataFrame(data[data['technology_level'].str.contains('2')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
+    "capacity_total_3 = pd.DataFrame(data[data['technology_level'].str.contains('3')].groupby(['capacity definition','source','year','type','country'])['value'].sum())\n",
+    "\n",
+    "capacity_total_comparison = pd.DataFrame(capacity_total_0)\n",
+    "capacity_total_comparison = pd.merge(capacity_total_0, capacity_total_1,left_index=True,right_index=True,how='left')\n",
+    "capacity_total_comparison = capacity_total_comparison.rename(columns={'value_x': 'technology level 0','value_y': 'technology level 1'})\n",
+    "capacity_total_comparison = pd.merge(capacity_total_comparison, capacity_total_2,left_index=True,right_index=True,how='left')\n",
     "capacity_total_comparison = pd.merge(capacity_total_comparison, capacity_total_3,left_index=True,right_index=True,how='left')\n",
+    "capacity_total_comparison = capacity_total_comparison.rename(columns={'value_x': 'technology level 2','value_y': 'technology level 3'})\n",
     "\n",
-    "capacity_total_comparison = capacity_total_comparison.rename(columns={'value_x': 'technology level 1','value_y': 'technology level 2','value': 'technology level 3'})\n",
     "\n",
     "capacity_total_comparison = capacity_total_comparison.sortlevel(['country','year'])\n",
     "\n",
@@ -330,7 +334,7 @@
    },
    "outputs": [],
    "source": [
-    "capacity_total_comparison = pd.DataFrame(capacity_total_comparison.stack()).reset_index().rename(columns={'level_4': 'technology_level',0: 'value'})\n",
+    "capacity_total_comparison = pd.DataFrame(capacity_total_comparison.stack()).reset_index().rename(columns={'level_5': 'technology_level',0: 'value'})\n",
     "\n",
     "capacity_total_pivot = pd.pivot_table(\n",
     "                               # select specific country for comparison\n",
@@ -381,55 +385,89 @@
     "\n",
     "metadata = \"\"\"\n",
     "\n",
-    "name: opsd-power-plants-germany\n",
-    "title: List of power plants in Germany.\n",
-    "description: This dataset contains an augmented and corrected power plant list based on the power plant list provided by the BNetzA.\n",
+    "name: opsd-national-generation-capacities\n",
+    "title: National electricity generation capacities of selected European countries.\n",
+    "description: This dataset comprises technology-specific aggregated generation capacities for selected European countries.\n",
     "version: 0.0.1\n",
-    "keywords: [power plants,germany]\n",
+    "keywords: [national generation capacities,europe]\n",
     "\n",
     "resources:\n",
-    "    path: power_plants_germany.csv\n",
+    "    path: national_generation_capacities.csv\n",
     "    format: csv\n",
     "    mediatype: text/csv\n",
     "    schema:  # Schema according to: http://dataprotocols.org/json-table-schema/        \n",
     "        fields:\n",
     "            - name: id\n",
-    "              description: \n",
-    "              type: string\n",
+    "              description: ID for data entries \n",
+    "              type: integer\n",
     "            - name: technology\n",
-    "              description: \n",
+    "              description: Generation technologies defined by fuel and conversion technology\n",
     "              type: string\n",
     "            - name: source\n",
-    "              description: \n",
+    "              description: Source of data entry\n",
     "              type: string\n",
     "            - name: year\n",
-    "              description: \n",
-    "              type: string\n",
-    "              format:\n",
+    "              description: Year \n",
+    "              type: integer\n",
+    "              format: YYYY\n",
     "            - name: type\n",
-    "              description: \n",
-    "              type: \n",
-    "              format:\n",
+    "              description: Type of capacity\n",
+    "              type: string\n",
     "            - name: country\n",
-    "              description: \n",
-    "              type: \n",
-    "              format:\n",
+    "              description: Country \n",
+    "              type: string\n",
+    "            - name: capacity definition\n",
+    "              description: Capacity definition used in the relevant source\n",
+    "              type: string\n",
     "            - name: value\n",
-    "              description: \n",
+    "              description: Capacity in MW\n",
     "              type: float\n",
-    "              format:\n",
-    "\n",
+    "            - name: technology_level\n",
+    "              description: Level of technology definitions (0-total aggregated capacity, 1-aggregated by type of fuel, 2-aggregated by fuel, 3-aggregated by fuel and technology)\n",
+    "              type: integer\n",
+    "              \n",
     "licenses:\n",
     "    - url: http://example.com/license/url/here\n",
     "      name: License Name Here\n",
     "      version: 1.0\n",
     "      id: license-id-from-open\n",
     "\n",
     "sources:\n",
-    "    - name: ,\n",
-    "      web: \n",
-    "    - name: ,\n",
-    "      web: \n",
+    "    - name: ENTSOE,\n",
+    "      web: https://www.entsoe.eu/publications/system-development-reports/adequacy-forecasts/Pages/default.aspx\n",
+    "    - name: EUROSTAT,\n",
+    "      web: http://ec.europa.eu/energy/en/statistics/country\n",
+    "    - name: e-control,\n",
+    "      web: http://www.e-control.at/statistik/strom/bestandsstatistik\n",
+    "    - name: ELIA,\n",
+    "      web: http://www.elia.be/en/grid-data/power-generation/generating-facilities\n",
+    "    - name: UN,\n",
+    "      web: http://data.un.org/Data.aspx?d=EDATA&f=cmID%3AEC\n",
+    "    - name: BFE,\n",
+    "      web: http://www.bfe.admin.ch/themen/00526/00541/00542/00630/index.html?dossier_id=00765\n",
+    "    - name: ERU,\n",
+    "      web: http://www.eru.cz/en/elektrina/statistika-a-sledovani-kvality/rocni-zpravy-o-provozu\n",
+    "    - name: BMWi,\n",
+    "      web: http://www.bmwi.de/BMWi/Redaktion/Binaer/Energiedaten/energietraeger10-stromerzeugungskapazitaeten-bruttostromerzeugung,property=blob,bereich=bmwi2012,sprache=de,rwb=true.xls\n",
+    "    - name: DEA,\n",
+    "      web:  http://www.ens.dk/en/info/facts-figures/energy-statistics-indicators-energy-efficiency/annual-energy-statistics\n",
+    "    - name: REE,\n",
+    "      web: http://www.ree.es/en/publications/statistical-data-of-spanish-electrical-system/national-indicators/\n",
+    "    - name: RTE 2014,\n",
+    "      web: http://www.rte-france.com/en/document/overview-electrical-energy-france-march-2014\n",
+    "    - name: RTE 2015,\n",
+    "      web:  http://clients.rte-france.com/lang/an/visiteurs/vie/prod/parc_reference.jsp\n",
+    "    - name: TERNA 2013,\n",
+    "      web: http://download.terna.it/terna/0000/0216/17.XLSX\n",
+    "    - name: TERNA 2014,\n",
+    "      web: http://download.terna.it/terna/0000/0216/16.XLSX\n",
+    "    - name: ILR,\n",
+    "      web: http://www.ilr.public.lu/electricite/statistiques/index.html\n",
+    "    - name: Tennet NL,\n",
+    "      web: http://energieinfo.tennet.org/dataexport/exporteerdatacountry.aspx?id=InstalledCapacity\n",
+    "    - name: CIRE,\n",
+    "      web: http://www.rynek-energii-elektrycznej.cire.pl/st,33,207,tr,75,0,0,0,0,0,podstawowe-dane.html\n",
+    "      \n",
     "\n",
     "maintainers:\n",
     "    - name: OPSD-Team,\n",
@@ -466,7 +504,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
@@ -486,20 +524,20 @@
     "data.to_excel(output_path+'national_generation_capacities.xlsx', sheet_name='output')\n",
     "\n",
     "#Write the information of the metadata\n",
-    "#with open(os.path.join(output_path, 'datapackage.json'), 'w') as f:\n",
-    "#    f.write(datapackage_json)\n",
+    "with open(os.path.join(output_path, 'datapackage.json'), 'w') as f:\n",
+    "    f.write(datapackage_json)\n",
     "\n",
     "#Set this string to this notebook's filename!    \n",
-    "#nb_filename = 'National_Generation_Capacities.ipynb'\n",
+    "nb_filename = 'National_Generation_Capacities.ipynb'\n",
     "\n",
     "# Save a copy of the notebook to markdown, to serve as the package README file\n",
-    "#subprocess.call(['ipython', 'nbconvert', '--to', 'markdown', nb_filename])\n",
-    "#path_readme = os.path.join(output_path2, 'README.md')\n",
-    "#try:\n",
-    "#    os.remove(path_readme)\n",
-    "#except Exception:\n",
-    "#    pass\n",
-    "#os.rename(nb_filename.replace('.ipynb', '.md'), path_readme)    \n",
+    "subprocess.call(['ipython', 'nbconvert', '--to', 'markdown', nb_filename])\n",
+    "path_readme = os.path.join(output_path2, 'README.md')\n",
+    "try:\n",
+    "    os.remove(path_readme)\n",
+    "except Exception:\n",
+    "    pass\n",
+    "os.rename(nb_filename.replace('.ipynb', '.md'), path_readme)    \n",
     " "
    ]
   },

diff --git a/inputs/National_Generation_Capacities.xlsx b/inputs/National_Generation_Capacities.xlsx